Bug Summary

File:build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 6466, column 63
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/source/llvm/lib/Target/AArch64 -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/BinaryFormat/Dwarf.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
27#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29#include "llvm/CodeGen/GlobalISel/Utils.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstr.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/TargetOpcodes.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicsAArch64.h"
44#include "llvm/IR/PatternMatch.h"
45#include "llvm/IR/Type.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/raw_ostream.h"
49#include <optional>
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if
148 /// std::nullopt is provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *
220 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
221 std::optional<CmpInst::Predicate> = std::nullopt) const;
222
223 MachineInstr *
224 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = std::nullopt) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return std::nullopt;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return std::nullopt;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!MO.getReg().isVirtual()) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!I.getOperand(0).getReg().isPhysical())
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910// FIXME: We need some sort of API in RBI/TRI to allow generic code to
911// constrain operands of simple instructions given a TargetRegisterClass
912// and LLT
913static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
914 const RegisterBankInfo &RBI) {
915 for (MachineOperand &MO : I.operands()) {
916 if (!MO.isReg())
917 continue;
918 Register Reg = MO.getReg();
919 if (!Reg)
920 continue;
921 if (Reg.isPhysical())
922 continue;
923 LLT Ty = MRI.getType(Reg);
924 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
925 const TargetRegisterClass *RC =
926 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!RC) {
928 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
929 RC = getRegClassForTypeOnBank(Ty, RB);
930 if (!RC) {
931 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
932 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
;
933 break;
934 }
935 }
936 RBI.constrainGenericRegister(Reg, *RC, MRI);
937 }
938
939 return true;
940}
941
942static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
943 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
944 const RegisterBankInfo &RBI) {
945 Register DstReg = I.getOperand(0).getReg();
946 Register SrcReg = I.getOperand(1).getReg();
947 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
948 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
949
950 // Find the correct register classes for the source and destination registers.
951 const TargetRegisterClass *SrcRC;
952 const TargetRegisterClass *DstRC;
953 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
954
955 if (!DstRC) {
956 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
957 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
958 return false;
959 }
960
961 // Is this a copy? If so, then we may need to insert a subregister copy.
962 if (I.isCopy()) {
963 // Yes. Check if there's anything to fix up.
964 if (!SrcRC) {
965 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
966 return false;
967 }
968
969 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
971 unsigned SubReg;
972
973 // If the source bank doesn't support a subregister copy small enough,
974 // then we first need to copy to the destination bank.
975 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
976 const TargetRegisterClass *DstTempRC =
977 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
978 getSubRegForClass(DstRC, TRI, SubReg);
979
980 MachineIRBuilder MIB(I);
981 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
982 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
983 } else if (SrcSize > DstSize) {
984 // If the source register is bigger than the destination we need to
985 // perform a subregister copy.
986 const TargetRegisterClass *SubRegRC =
987 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
988 getSubRegForClass(SubRegRC, TRI, SubReg);
989 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
990 } else if (DstSize > SrcSize) {
991 // If the destination register is bigger than the source we need to do
992 // a promotion using SUBREG_TO_REG.
993 const TargetRegisterClass *PromotionRC =
994 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
995 getSubRegForClass(SrcRC, TRI, SubReg);
996
997 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
998 BuildMI(*I.getParent(), I, I.getDebugLoc(),
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1000 .addImm(0)
1001 .addUse(SrcReg)
1002 .addImm(SubReg);
1003 MachineOperand &RegOp = I.getOperand(1);
1004 RegOp.setReg(PromoteReg);
1005 }
1006
1007 // If the destination is a physical register, then there's nothing to
1008 // change, so we're done.
1009 if (DstReg.isPhysical())
1010 return true;
1011 }
1012
1013 // No need to constrain SrcReg. It will get constrained when we hit another
1014 // of its use or its defs. Copies do not have constraints.
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1016 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1017 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1018 return false;
1019 }
1020
1021 // If this a GPR ZEXT that we want to just reduce down into a copy.
1022 // The sizes will be mismatched with the source < 32b but that's ok.
1023 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(TII.get(AArch64::COPY));
1025 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1025, __extension__ __PRETTY_FUNCTION__))
;
1026 return selectCopy(I, TII, MRI, TRI, RBI);
1027 }
1028
1029 I.setDesc(TII.get(AArch64::COPY));
1030 return true;
1031}
1032
1033static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1034 if (!DstTy.isScalar() || !SrcTy.isScalar())
1035 return GenericOpc;
1036
1037 const unsigned DstSize = DstTy.getSizeInBits();
1038 const unsigned SrcSize = SrcTy.getSizeInBits();
1039
1040 switch (DstSize) {
1041 case 32:
1042 switch (SrcSize) {
1043 case 32:
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1053 default:
1054 return GenericOpc;
1055 }
1056 case 64:
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1066 default:
1067 return GenericOpc;
1068 }
1069 default:
1070 return GenericOpc;
1071 }
1072 case 64:
1073 switch (SrcSize) {
1074 case 32:
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1084 default:
1085 return GenericOpc;
1086 }
1087 case 64:
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1097 default:
1098 return GenericOpc;
1099 }
1100 default:
1101 return GenericOpc;
1102 }
1103 default:
1104 return GenericOpc;
1105 };
1106 return GenericOpc;
1107}
1108
1109MachineInstr *
1110AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1111 Register False, AArch64CC::CondCode CC,
1112 MachineIRBuilder &MIB) const {
1113 MachineRegisterInfo &MRI = *MIB.getMRI();
1114 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1115 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1116 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
;
1117 LLT Ty = MRI.getType(True);
1118 if (Ty.isVector())
1119 return nullptr;
1120 const unsigned Size = Ty.getSizeInBits();
1121 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
1122 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 const bool Is32Bit = Size == 32;
1124 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1127 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1128 return &*FCSel;
1129 }
1130
1131 // By default, we'll try and emit a CSEL.
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1133 bool Optimized = false;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1135 &Optimized](Register &Reg, Register &OtherReg,
1136 bool Invert) {
1137 if (Optimized)
1138 return false;
1139
1140 // Attempt to fold:
1141 //
1142 // %sub = G_SUB 0, %x
1143 // %select = G_SELECT cc, %reg, %sub
1144 //
1145 // Into:
1146 // %select = CSNEG %reg, %x, cc
1147 Register MatchReg;
1148 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1150 Reg = MatchReg;
1151 if (Invert) {
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 std::swap(Reg, OtherReg);
1154 }
1155 return true;
1156 }
1157
1158 // Attempt to fold:
1159 //
1160 // %xor = G_XOR %x, -1
1161 // %select = G_SELECT cc, %reg, %xor
1162 //
1163 // Into:
1164 // %select = CSINV %reg, %x, cc
1165 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1167 Reg = MatchReg;
1168 if (Invert) {
1169 CC = AArch64CC::getInvertedCondCode(CC);
1170 std::swap(Reg, OtherReg);
1171 }
1172 return true;
1173 }
1174
1175 // Attempt to fold:
1176 //
1177 // %add = G_ADD %x, 1
1178 // %select = G_SELECT cc, %reg, %add
1179 //
1180 // Into:
1181 // %select = CSINC %reg, %x, cc
1182 if (mi_match(Reg, MRI,
1183 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1184 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 Reg = MatchReg;
1187 if (Invert) {
1188 CC = AArch64CC::getInvertedCondCode(CC);
1189 std::swap(Reg, OtherReg);
1190 }
1191 return true;
1192 }
1193
1194 return false;
1195 };
1196
1197 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1198 // true/false values are constants.
1199 // FIXME: All of these patterns already exist in tablegen. We should be
1200 // able to import these.
1201 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1202 &Optimized]() {
1203 if (Optimized)
1204 return false;
1205 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1206 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1207 if (!TrueCst && !FalseCst)
1208 return false;
1209
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t T = TrueCst->Value.getSExtValue();
1213 int64_t F = FalseCst->Value.getSExtValue();
1214
1215 if (T == 0 && F == 1) {
1216 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1218 True = ZReg;
1219 False = ZReg;
1220 return true;
1221 }
1222
1223 if (T == 0 && F == -1) {
1224 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230 }
1231
1232 if (TrueCst) {
1233 int64_t T = TrueCst->Value.getSExtValue();
1234 if (T == 1) {
1235 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1237 True = False;
1238 False = ZReg;
1239 CC = AArch64CC::getInvertedCondCode(CC);
1240 return true;
1241 }
1242
1243 if (T == -1) {
1244 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251 }
1252
1253 if (FalseCst) {
1254 int64_t F = FalseCst->Value.getSExtValue();
1255 if (F == 1) {
1256 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 False = ZReg;
1259 return true;
1260 }
1261
1262 if (F == -1) {
1263 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1265 False = ZReg;
1266 return true;
1267 }
1268 }
1269 return false;
1270 };
1271
1272 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1274 Optimized |= TryOptSelectCst();
1275 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1276 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1277 return &*SelectInst;
1278}
1279
1280static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1283)
;
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 return AArch64CC::GE;
1292 case CmpInst::ICMP_SLT:
1293 return AArch64CC::LT;
1294 case CmpInst::ICMP_SLE:
1295 return AArch64CC::LE;
1296 case CmpInst::ICMP_UGT:
1297 return AArch64CC::HI;
1298 case CmpInst::ICMP_UGE:
1299 return AArch64CC::HS;
1300 case CmpInst::ICMP_ULT:
1301 return AArch64CC::LO;
1302 case CmpInst::ICMP_ULE:
1303 return AArch64CC::LS;
1304 }
1305}
1306
1307/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1308static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1309 AArch64CC::CondCode &CondCode,
1310 AArch64CC::CondCode &CondCode2) {
1311 CondCode2 = AArch64CC::AL;
1312 switch (CC) {
1313 default:
1314 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314)
;
1315 case CmpInst::FCMP_OEQ:
1316 CondCode = AArch64CC::EQ;
1317 break;
1318 case CmpInst::FCMP_OGT:
1319 CondCode = AArch64CC::GT;
1320 break;
1321 case CmpInst::FCMP_OGE:
1322 CondCode = AArch64CC::GE;
1323 break;
1324 case CmpInst::FCMP_OLT:
1325 CondCode = AArch64CC::MI;
1326 break;
1327 case CmpInst::FCMP_OLE:
1328 CondCode = AArch64CC::LS;
1329 break;
1330 case CmpInst::FCMP_ONE:
1331 CondCode = AArch64CC::MI;
1332 CondCode2 = AArch64CC::GT;
1333 break;
1334 case CmpInst::FCMP_ORD:
1335 CondCode = AArch64CC::VC;
1336 break;
1337 case CmpInst::FCMP_UNO:
1338 CondCode = AArch64CC::VS;
1339 break;
1340 case CmpInst::FCMP_UEQ:
1341 CondCode = AArch64CC::EQ;
1342 CondCode2 = AArch64CC::VS;
1343 break;
1344 case CmpInst::FCMP_UGT:
1345 CondCode = AArch64CC::HI;
1346 break;
1347 case CmpInst::FCMP_UGE:
1348 CondCode = AArch64CC::PL;
1349 break;
1350 case CmpInst::FCMP_ULT:
1351 CondCode = AArch64CC::LT;
1352 break;
1353 case CmpInst::FCMP_ULE:
1354 CondCode = AArch64CC::LE;
1355 break;
1356 case CmpInst::FCMP_UNE:
1357 CondCode = AArch64CC::NE;
1358 break;
1359 }
1360}
1361
1362/// Convert an IR fp condition code to an AArch64 CC.
1363/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1364/// should be AND'ed instead of OR'ed.
1365static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1372 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__))
;
1373 break;
1374 case CmpInst::FCMP_ONE:
1375 // (a one b)
1376 // == ((a olt b) || (a ogt b))
1377 // == ((a ord b) && (a une b))
1378 CondCode = AArch64CC::VC;
1379 CondCode2 = AArch64CC::NE;
1380 break;
1381 case CmpInst::FCMP_UEQ:
1382 // (a ueq b)
1383 // == ((a uno b) || (a oeq b))
1384 // == ((a ule b) && (a uge b))
1385 CondCode = AArch64CC::PL;
1386 CondCode2 = AArch64CC::LE;
1387 break;
1388 }
1389}
1390
1391/// Return a register which can be used as a bit to test in a TB(N)Z.
1392static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1393 MachineRegisterInfo &MRI) {
1394 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__))
;
1395 bool HasZext = false;
1396 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1397 unsigned Opc = MI->getOpcode();
1398
1399 if (!MI->getOperand(0).isReg() ||
1400 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1401 break;
1402
1403 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1404 //
1405 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1406 // on the truncated x is the same as the bit number on x.
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1410 HasZext = true;
1411
1412 Register NextReg = MI->getOperand(1).getReg();
1413 // Did we find something worth folding?
1414 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1415 break;
1416
1417 // NextReg is worth folding. Keep looking.
1418 Reg = NextReg;
1419 continue;
1420 }
1421
1422 // Attempt to find a suitable operation with a constant on one side.
1423 std::optional<uint64_t> C;
1424 Register TestReg;
1425 switch (Opc) {
1426 default:
1427 break;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg = MI->getOperand(1).getReg();
1431 Register ConstantReg = MI->getOperand(2).getReg();
1432 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1433 if (!VRegAndVal) {
1434 // AND commutes, check the other side for a constant.
1435 // FIXME: Can we canonicalize the constant so that it's always on the
1436 // same side at some point earlier?
1437 std::swap(ConstantReg, TestReg);
1438 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1439 }
1440 if (VRegAndVal) {
1441 if (HasZext)
1442 C = VRegAndVal->Value.getZExtValue();
1443 else
1444 C = VRegAndVal->Value.getSExtValue();
1445 }
1446 break;
1447 }
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg = MI->getOperand(1).getReg();
1452 auto VRegAndVal =
1453 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1454 if (VRegAndVal)
1455 C = VRegAndVal->Value.getSExtValue();
1456 break;
1457 }
1458 }
1459
1460 // Didn't find a constant or viable register. Bail out of the loop.
1461 if (!C || !TestReg.isValid())
1462 break;
1463
1464 // We found a suitable instruction with a constant. Check to see if we can
1465 // walk through the instruction.
1466 Register NextReg;
1467 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1468 switch (Opc) {
1469 default:
1470 break;
1471 case TargetOpcode::G_AND:
1472 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1473 if ((*C >> Bit) & 1)
1474 NextReg = TestReg;
1475 break;
1476 case TargetOpcode::G_SHL:
1477 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1478 // the type of the register.
1479 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1480 NextReg = TestReg;
1481 Bit = Bit - *C;
1482 }
1483 break;
1484 case TargetOpcode::G_ASHR:
1485 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1486 // in x
1487 NextReg = TestReg;
1488 Bit = Bit + *C;
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1491 break;
1492 case TargetOpcode::G_LSHR:
1493 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1494 if ((Bit + *C) < TestRegSize) {
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 }
1498 break;
1499 case TargetOpcode::G_XOR:
1500 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1501 // appropriate.
1502 //
1503 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1504 //
1505 // tbz x', b -> tbnz x, b
1506 //
1507 // Because x' only has the b-th bit set if x does not.
1508 if ((*C >> Bit) & 1)
1509 Invert = !Invert;
1510 NextReg = TestReg;
1511 break;
1512 }
1513
1514 // Check if we found anything worth folding.
1515 if (!NextReg.isValid())
1516 return Reg;
1517 Reg = NextReg;
1518 }
1519
1520 return Reg;
1521}
1522
1523MachineInstr *AArch64InstructionSelector::emitTestBit(
1524 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
1528 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 MachineRegisterInfo &MRI = *MIB.getMRI();
1530
1531 // Attempt to optimize the test bit by walking over instructions.
1532 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1533 LLT Ty = MRI.getType(TestReg);
1534 unsigned Size = Ty.getSizeInBits();
1535 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __extension__ __PRETTY_FUNCTION__))
;
1536 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537
1538 // When the test register is a 64-bit register, we have to narrow to make
1539 // TBNZW work.
1540 bool UseWReg = Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1545 MIB);
1546
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1550 auto TestBitMI =
1551 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1552 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1553 return &*TestBitMI;
1554}
1555
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1557 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1558 MachineIRBuilder &MIB) const {
1559 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 // Given something like this:
1561 //
1562 // %x = ...Something...
1563 // %one = G_CONSTANT i64 1
1564 // %zero = G_CONSTANT i64 0
1565 // %and = G_AND %x, %one
1566 // %cmp = G_ICMP intpred(ne), %and, %zero
1567 // %cmp_trunc = G_TRUNC %cmp
1568 // G_BRCOND %cmp_trunc, %bb.3
1569 //
1570 // We want to try and fold the AND into the G_BRCOND and produce either a
1571 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1572 //
1573 // In this case, we'd get
1574 //
1575 // TBNZ %x %bb.3
1576 //
1577
1578 // Check if the AND has a constant on its RHS which we can use as a mask.
1579 // If it's a power of 2, then it's the same as checking a specific bit.
1580 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1581 auto MaybeBit = getIConstantVRegValWithLookThrough(
1582 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1583 if (!MaybeBit)
1584 return false;
1585
1586 int32_t Bit = MaybeBit->Value.exactLogBase2();
1587 if (Bit < 0)
1588 return false;
1589
1590 Register TestReg = AndInst.getOperand(1).getReg();
1591
1592 // Emit a TB(N)Z.
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1594 return true;
1595}
1596
1597MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1598 bool IsNegative,
1599 MachineBasicBlock *DestMBB,
1600 MachineIRBuilder &MIB) const {
1601 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1601, __extension__ __PRETTY_FUNCTION__))
;
1602 MachineRegisterInfo &MRI = *MIB.getMRI();
1603 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1604 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1605 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
;
1606 auto Ty = MRI.getType(CompareReg);
1607 unsigned Width = Ty.getSizeInBits();
1608 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1609, __extension__ __PRETTY_FUNCTION__))
;
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][Width == 64];
1613 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1614 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1615 return &*BranchMI;
1616}
1617
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1619 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1620 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1620, __extension__ __PRETTY_FUNCTION__))
;
1621 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1623 // totally clean. Some of them require two branches to implement.
1624 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1625 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1626 Pred);
1627 AArch64CC::CondCode CC1, CC2;
1628 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1629 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1630 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1631 if (CC2 != AArch64CC::AL)
1632 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1633 I.eraseFromParent();
1634 return true;
1635}
1636
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1639, __extension__ __PRETTY_FUNCTION__))
;
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1640, __extension__ __PRETTY_FUNCTION__))
;
1641 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1642 //
1643 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1644 // instructions will not be produced, as they are conditional branch
1645 // instructions that do not set flags.
1646 if (!ProduceNonFlagSettingCondBr)
1647 return false;
1648
1649 MachineRegisterInfo &MRI = *MIB.getMRI();
1650 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1651 auto Pred =
1652 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1653 Register LHS = ICmp.getOperand(2).getReg();
1654 Register RHS = ICmp.getOperand(3).getReg();
1655
1656 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1657 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1658 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1659
1660 // When we can emit a TB(N)Z, prefer that.
1661 //
1662 // Handle non-commutative condition codes first.
1663 // Note that we don't want to do this when we have a G_AND because it can
1664 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1665 if (VRegAndVal && !AndInst) {
1666 int64_t C = VRegAndVal->Value.getSExtValue();
1667
1668 // When we have a greater-than comparison, we can just test if the msb is
1669 // zero.
1670 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1671 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1672 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1673 I.eraseFromParent();
1674 return true;
1675 }
1676
1677 // When we have a less than comparison, we can just test if the msb is not
1678 // zero.
1679 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1680 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1681 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685
1686 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1687 // we can test if the msb is zero.
1688 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1689 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1690 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1691 I.eraseFromParent();
1692 return true;
1693 }
1694 }
1695
1696 // Attempt to handle commutative condition codes. Right now, that's only
1697 // eq/ne.
1698 if (ICmpInst::isEquality(Pred)) {
1699 if (!VRegAndVal) {
1700 std::swap(RHS, LHS);
1701 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1702 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1703 }
1704
1705 if (VRegAndVal && VRegAndVal->Value == 0) {
1706 // If there's a G_AND feeding into this branch, try to fold it away by
1707 // emitting a TB(N)Z instead.
1708 //
1709 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1710 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1711 // would be redundant.
1712 if (AndInst &&
1713 tryOptAndIntoCompareBranch(
1714 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1715 I.eraseFromParent();
1716 return true;
1717 }
1718
1719 // Otherwise, try to emit a CB(N)Z instead.
1720 auto LHSTy = MRI.getType(LHS);
1721 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1722 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1723 I.eraseFromParent();
1724 return true;
1725 }
1726 }
1727 }
1728
1729 return false;
1730}
1731
1732bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1733 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1734 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1734, __extension__ __PRETTY_FUNCTION__))
;
1735 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__))
;
1736 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1737 return true;
1738
1739 // Couldn't optimize. Emit a compare + a Bcc.
1740 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1741 auto PredOp = ICmp.getOperand(1);
1742 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1743 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1744 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1745 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1746 I.eraseFromParent();
1747 return true;
1748}
1749
1750bool AArch64InstructionSelector::selectCompareBranch(
1751 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1752 Register CondReg = I.getOperand(0).getReg();
1753 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1754 // Try to select the G_BRCOND using whatever is feeding the condition if
1755 // possible.
1756 unsigned CCMIOpc = CCMI->getOpcode();
1757 if (CCMIOpc == TargetOpcode::G_FCMP)
1758 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1759 if (CCMIOpc == TargetOpcode::G_ICMP)
1760 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1761
1762 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1763 // instructions will not be produced, as they are conditional branch
1764 // instructions that do not set flags.
1765 if (ProduceNonFlagSettingCondBr) {
1766 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1767 I.getOperand(1).getMBB(), MIB);
1768 I.eraseFromParent();
1769 return true;
1770 }
1771
1772 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1773 auto TstMI =
1774 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1775 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1776 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1777 .addImm(AArch64CC::EQ)
1778 .addMBB(I.getOperand(1).getMBB());
1779 I.eraseFromParent();
1780 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1781}
1782
1783/// Returns the element immediate value of a vector shift operand if found.
1784/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1785static std::optional<int64_t> getVectorShiftImm(Register Reg,
1786 MachineRegisterInfo &MRI) {
1787 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1787, __extension__ __PRETTY_FUNCTION__))
;
1788 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1789 return getAArch64VectorSplatScalar(*OpMI, MRI);
1790}
1791
1792/// Matches and returns the shift immediate value for a SHL instruction given
1793/// a shift operand.
1794static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1795 MachineRegisterInfo &MRI) {
1796 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1797 if (!ShiftImm)
1798 return std::nullopt;
1799 // Check the immediate is in range for a SHL.
1800 int64_t Imm = *ShiftImm;
1801 if (Imm < 0)
1802 return std::nullopt;
1803 switch (SrcTy.getElementType().getSizeInBits()) {
1804 default:
1805 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1806 return std::nullopt;
1807 case 8:
1808 if (Imm > 7)
1809 return std::nullopt;
1810 break;
1811 case 16:
1812 if (Imm > 15)
1813 return std::nullopt;
1814 break;
1815 case 32:
1816 if (Imm > 31)
1817 return std::nullopt;
1818 break;
1819 case 64:
1820 if (Imm > 63)
1821 return std::nullopt;
1822 break;
1823 }
1824 return Imm;
1825}
1826
1827bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1828 MachineRegisterInfo &MRI) {
1829 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1829, __extension__ __PRETTY_FUNCTION__))
;
1830 Register DstReg = I.getOperand(0).getReg();
1831 const LLT Ty = MRI.getType(DstReg);
1832 Register Src1Reg = I.getOperand(1).getReg();
1833 Register Src2Reg = I.getOperand(2).getReg();
1834
1835 if (!Ty.isVector())
1836 return false;
1837
1838 // Check if we have a vector of constants on RHS that we can select as the
1839 // immediate form.
1840 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1841
1842 unsigned Opc = 0;
1843 if (Ty == LLT::fixed_vector(2, 64)) {
1844 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1845 } else if (Ty == LLT::fixed_vector(4, 32)) {
1846 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1847 } else if (Ty == LLT::fixed_vector(2, 32)) {
1848 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1849 } else if (Ty == LLT::fixed_vector(4, 16)) {
1850 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1851 } else if (Ty == LLT::fixed_vector(8, 16)) {
1852 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1853 } else if (Ty == LLT::fixed_vector(16, 8)) {
1854 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1855 } else if (Ty == LLT::fixed_vector(8, 8)) {
1856 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1857 } else {
1858 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1859 return false;
1860 }
1861
1862 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1863 if (ImmVal)
1864 Shl.addImm(*ImmVal);
1865 else
1866 Shl.addUse(Src2Reg);
1867 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1868 I.eraseFromParent();
1869 return true;
1870}
1871
1872bool AArch64InstructionSelector::selectVectorAshrLshr(
1873 MachineInstr &I, MachineRegisterInfo &MRI) {
1874 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1875, __extension__ __PRETTY_FUNCTION__))
1875 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1875, __extension__ __PRETTY_FUNCTION__))
;
1876 Register DstReg = I.getOperand(0).getReg();
1877 const LLT Ty = MRI.getType(DstReg);
1878 Register Src1Reg = I.getOperand(1).getReg();
1879 Register Src2Reg = I.getOperand(2).getReg();
1880
1881 if (!Ty.isVector())
1882 return false;
1883
1884 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1885
1886 // We expect the immediate case to be lowered in the PostLegalCombiner to
1887 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1888
1889 // There is not a shift right register instruction, but the shift left
1890 // register instruction takes a signed value, where negative numbers specify a
1891 // right shift.
1892
1893 unsigned Opc = 0;
1894 unsigned NegOpc = 0;
1895 const TargetRegisterClass *RC =
1896 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1897 if (Ty == LLT::fixed_vector(2, 64)) {
1898 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1899 NegOpc = AArch64::NEGv2i64;
1900 } else if (Ty == LLT::fixed_vector(4, 32)) {
1901 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1902 NegOpc = AArch64::NEGv4i32;
1903 } else if (Ty == LLT::fixed_vector(2, 32)) {
1904 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1905 NegOpc = AArch64::NEGv2i32;
1906 } else if (Ty == LLT::fixed_vector(4, 16)) {
1907 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1908 NegOpc = AArch64::NEGv4i16;
1909 } else if (Ty == LLT::fixed_vector(8, 16)) {
1910 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1911 NegOpc = AArch64::NEGv8i16;
1912 } else if (Ty == LLT::fixed_vector(16, 8)) {
1913 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1914 NegOpc = AArch64::NEGv16i8;
1915 } else if (Ty == LLT::fixed_vector(8, 8)) {
1916 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1917 NegOpc = AArch64::NEGv8i8;
1918 } else {
1919 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1920 return false;
1921 }
1922
1923 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1924 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1925 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1926 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1927 I.eraseFromParent();
1928 return true;
1929}
1930
1931bool AArch64InstructionSelector::selectVaStartAAPCS(
1932 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1933 return false;
1934}
1935
1936bool AArch64InstructionSelector::selectVaStartDarwin(
1937 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1938 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1939 Register ListReg = I.getOperand(0).getReg();
1940
1941 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1942
1943 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1944 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
1945 MF.getFunction().getCallingConv())) {
1946 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1947 ? FuncInfo->getVarArgsGPRIndex()
1948 : FuncInfo->getVarArgsStackIndex();
1949 }
1950
1951 auto MIB =
1952 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1953 .addDef(ArgsAddrReg)
1954 .addFrameIndex(FrameIdx)
1955 .addImm(0)
1956 .addImm(0);
1957
1958 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1959
1960 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1961 .addUse(ArgsAddrReg)
1962 .addUse(ListReg)
1963 .addImm(0)
1964 .addMemOperand(*I.memoperands_begin());
1965
1966 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1967 I.eraseFromParent();
1968 return true;
1969}
1970
1971void AArch64InstructionSelector::materializeLargeCMVal(
1972 MachineInstr &I, const Value *V, unsigned OpFlags) {
1973 MachineBasicBlock &MBB = *I.getParent();
1974 MachineFunction &MF = *MBB.getParent();
1975 MachineRegisterInfo &MRI = MF.getRegInfo();
1976
1977 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1978 MovZ->addOperand(MF, I.getOperand(1));
1979 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1980 AArch64II::MO_NC);
1981 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1982 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1983
1984 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1985 Register ForceDstReg) {
1986 Register DstReg = ForceDstReg
1987 ? ForceDstReg
1988 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1989 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1990 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1991 MovI->addOperand(MF, MachineOperand::CreateGA(
1992 GV, MovZ->getOperand(1).getOffset(), Flags));
1993 } else {
1994 MovI->addOperand(
1995 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1996 MovZ->getOperand(1).getOffset(), Flags));
1997 }
1998 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1999 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
2000 return DstReg;
2001 };
2002 Register DstReg = BuildMovK(MovZ.getReg(0),
2003 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2004 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2005 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2006}
2007
2008bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2009 MachineBasicBlock &MBB = *I.getParent();
2010 MachineFunction &MF = *MBB.getParent();
2011 MachineRegisterInfo &MRI = MF.getRegInfo();
2012
2013 switch (I.getOpcode()) {
2014 case TargetOpcode::G_STORE: {
2015 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2016 MachineOperand &SrcOp = I.getOperand(0);
2017 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2018 // Allow matching with imported patterns for stores of pointers. Unlike
2019 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2020 // and constrain.
2021 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2022 Register NewSrc = Copy.getReg(0);
2023 SrcOp.setReg(NewSrc);
2024 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2025 Changed = true;
2026 }
2027 return Changed;
2028 }
2029 case TargetOpcode::G_PTR_ADD:
2030 return convertPtrAddToAdd(I, MRI);
2031 case TargetOpcode::G_LOAD: {
2032 // For scalar loads of pointers, we try to convert the dest type from p0
2033 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2034 // conversion, this should be ok because all users should have been
2035 // selected already, so the type doesn't matter for them.
2036 Register DstReg = I.getOperand(0).getReg();
2037 const LLT DstTy = MRI.getType(DstReg);
2038 if (!DstTy.isPointer())
2039 return false;
2040 MRI.setType(DstReg, LLT::scalar(64));
2041 return true;
2042 }
2043 case AArch64::G_DUP: {
2044 // Convert the type from p0 to s64 to help selection.
2045 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2046 if (!DstTy.getElementType().isPointer())
2047 return false;
2048 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2049 MRI.setType(I.getOperand(0).getReg(),
2050 DstTy.changeElementType(LLT::scalar(64)));
2051 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2052 I.getOperand(1).setReg(NewSrc.getReg(0));
2053 return true;
2054 }
2055 case TargetOpcode::G_UITOFP:
2056 case TargetOpcode::G_SITOFP: {
2057 // If both source and destination regbanks are FPR, then convert the opcode
2058 // to G_SITOF so that the importer can select it to an fpr variant.
2059 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2060 // copy.
2061 Register SrcReg = I.getOperand(1).getReg();
2062 LLT SrcTy = MRI.getType(SrcReg);
2063 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2064 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2065 return false;
2066
2067 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2068 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2069 I.setDesc(TII.get(AArch64::G_SITOF));
2070 else
2071 I.setDesc(TII.get(AArch64::G_UITOF));
2072 return true;
2073 }
2074 return false;
2075 }
2076 default:
2077 return false;
2078 }
2079}
2080
2081/// This lowering tries to look for G_PTR_ADD instructions and then converts
2082/// them to a standard G_ADD with a COPY on the source.
2083///
2084/// The motivation behind this is to expose the add semantics to the imported
2085/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2086/// because the selector works bottom up, uses before defs. By the time we
2087/// end up trying to select a G_PTR_ADD, we should have already attempted to
2088/// fold this into addressing modes and were therefore unsuccessful.
2089bool AArch64InstructionSelector::convertPtrAddToAdd(
2090 MachineInstr &I, MachineRegisterInfo &MRI) {
2091 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2091, __extension__ __PRETTY_FUNCTION__))
;
2092 Register DstReg = I.getOperand(0).getReg();
2093 Register AddOp1Reg = I.getOperand(1).getReg();
2094 const LLT PtrTy = MRI.getType(DstReg);
2095 if (PtrTy.getAddressSpace() != 0)
2096 return false;
2097
2098 const LLT CastPtrTy =
2099 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2100 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2101 // Set regbanks on the registers.
2102 if (PtrTy.isVector())
2103 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2104 else
2105 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2106
2107 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2108 // %dst(intty) = G_ADD %intbase, off
2109 I.setDesc(TII.get(TargetOpcode::G_ADD));
2110 MRI.setType(DstReg, CastPtrTy);
2111 I.getOperand(1).setReg(PtrToInt.getReg(0));
2112 if (!select(*PtrToInt)) {
2113 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2114 return false;
2115 }
2116
2117 // Also take the opportunity here to try to do some optimization.
2118 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2119 Register NegatedReg;
2120 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2121 return true;
2122 I.getOperand(2).setReg(NegatedReg);
2123 I.setDesc(TII.get(TargetOpcode::G_SUB));
2124 return true;
2125}
2126
2127bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2128 MachineRegisterInfo &MRI) {
2129 // We try to match the immediate variant of LSL, which is actually an alias
2130 // for a special case of UBFM. Otherwise, we fall back to the imported
2131 // selector which will match the register variant.
2132 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2132, __extension__ __PRETTY_FUNCTION__))
;
2133 const auto &MO = I.getOperand(2);
2134 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2135 if (!VRegAndVal)
2136 return false;
2137
2138 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2139 if (DstTy.isVector())
2140 return false;
2141 bool Is64Bit = DstTy.getSizeInBits() == 64;
2142 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2143 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2144
2145 if (!Imm1Fn || !Imm2Fn)
2146 return false;
2147
2148 auto NewI =
2149 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2150 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2151
2152 for (auto &RenderFn : *Imm1Fn)
2153 RenderFn(NewI);
2154 for (auto &RenderFn : *Imm2Fn)
2155 RenderFn(NewI);
2156
2157 I.eraseFromParent();
2158 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2159}
2160
2161bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2162 MachineInstr &I, MachineRegisterInfo &MRI) {
2163 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2163, __extension__ __PRETTY_FUNCTION__))
;
2164 // If we're storing a scalar, it doesn't matter what register bank that
2165 // scalar is on. All that matters is the size.
2166 //
2167 // So, if we see something like this (with a 32-bit scalar as an example):
2168 //
2169 // %x:gpr(s32) = ... something ...
2170 // %y:fpr(s32) = COPY %x:gpr(s32)
2171 // G_STORE %y:fpr(s32)
2172 //
2173 // We can fix this up into something like this:
2174 //
2175 // G_STORE %x:gpr(s32)
2176 //
2177 // And then continue the selection process normally.
2178 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2179 if (!DefDstReg.isValid())
2180 return false;
2181 LLT DefDstTy = MRI.getType(DefDstReg);
2182 Register StoreSrcReg = I.getOperand(0).getReg();
2183 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2184
2185 // If we get something strange like a physical register, then we shouldn't
2186 // go any further.
2187 if (!DefDstTy.isValid())
2188 return false;
2189
2190 // Are the source and dst types the same size?
2191 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2192 return false;
2193
2194 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2195 RBI.getRegBank(DefDstReg, MRI, TRI))
2196 return false;
2197
2198 // We have a cross-bank copy, which is entering a store. Let's fold it.
2199 I.getOperand(0).setReg(DefDstReg);
2200 return true;
2201}
2202
2203bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2204 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2204, __extension__ __PRETTY_FUNCTION__))
;
2205 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2205, __extension__ __PRETTY_FUNCTION__))
;
2206
2207 MachineBasicBlock &MBB = *I.getParent();
2208 MachineFunction &MF = *MBB.getParent();
2209 MachineRegisterInfo &MRI = MF.getRegInfo();
2210
2211 switch (I.getOpcode()) {
2212 case AArch64::G_DUP: {
2213 // Before selecting a DUP instruction, check if it is better selected as a
2214 // MOV or load from a constant pool.
2215 Register Src = I.getOperand(1).getReg();
2216 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2217 if (!ValAndVReg)
2218 return false;
2219 LLVMContext &Ctx = MF.getFunction().getContext();
2220 Register Dst = I.getOperand(0).getReg();
2221 auto *CV = ConstantDataVector::getSplat(
2222 MRI.getType(Dst).getNumElements(),
2223 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2224 ValAndVReg->Value));
2225 if (!emitConstantVector(Dst, CV, MIB, MRI))
2226 return false;
2227 I.eraseFromParent();
2228 return true;
2229 }
2230 case TargetOpcode::G_SEXT:
2231 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2232 // over a normal extend.
2233 if (selectUSMovFromExtend(I, MRI))
2234 return true;
2235 return false;
2236 case TargetOpcode::G_BR:
2237 return false;
2238 case TargetOpcode::G_SHL:
2239 return earlySelectSHL(I, MRI);
2240 case TargetOpcode::G_CONSTANT: {
2241 bool IsZero = false;
2242 if (I.getOperand(1).isCImm())
2243 IsZero = I.getOperand(1).getCImm()->isZero();
2244 else if (I.getOperand(1).isImm())
2245 IsZero = I.getOperand(1).getImm() == 0;
2246
2247 if (!IsZero)
2248 return false;
2249
2250 Register DefReg = I.getOperand(0).getReg();
2251 LLT Ty = MRI.getType(DefReg);
2252 if (Ty.getSizeInBits() == 64) {
2253 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2254 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2255 } else if (Ty.getSizeInBits() == 32) {
2256 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2257 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2258 } else
2259 return false;
2260
2261 I.setDesc(TII.get(TargetOpcode::COPY));
2262 return true;
2263 }
2264
2265 case TargetOpcode::G_ADD: {
2266 // Check if this is being fed by a G_ICMP on either side.
2267 //
2268 // (cmp pred, x, y) + z
2269 //
2270 // In the above case, when the cmp is true, we increment z by 1. So, we can
2271 // fold the add into the cset for the cmp by using cinc.
2272 //
2273 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2274 Register AddDst = I.getOperand(0).getReg();
2275 Register AddLHS = I.getOperand(1).getReg();
2276 Register AddRHS = I.getOperand(2).getReg();
2277 // Only handle scalars.
2278 LLT Ty = MRI.getType(AddLHS);
2279 if (Ty.isVector())
2280 return false;
2281 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2282 // bits.
2283 unsigned Size = Ty.getSizeInBits();
2284 if (Size != 32 && Size != 64)
2285 return false;
2286 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2287 if (!MRI.hasOneNonDBGUse(Reg))
2288 return nullptr;
2289 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2290 // compare.
2291 if (Size == 32)
2292 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2293 // We model scalar compares using 32-bit destinations right now.
2294 // If it's a 64-bit compare, it'll have 64-bit sources.
2295 Register ZExt;
2296 if (!mi_match(Reg, MRI,
2297 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2298 return nullptr;
2299 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2300 if (!Cmp ||
2301 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2302 return nullptr;
2303 return Cmp;
2304 };
2305 // Try to match
2306 // z + (cmp pred, x, y)
2307 MachineInstr *Cmp = MatchCmp(AddRHS);
2308 if (!Cmp) {
2309 // (cmp pred, x, y) + z
2310 std::swap(AddLHS, AddRHS);
2311 Cmp = MatchCmp(AddRHS);
2312 if (!Cmp)
2313 return false;
2314 }
2315 auto &PredOp = Cmp->getOperand(1);
2316 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2317 const AArch64CC::CondCode InvCC =
2318 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2319 MIB.setInstrAndDebugLoc(I);
2320 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2321 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2322 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2323 I.eraseFromParent();
2324 return true;
2325 }
2326 case TargetOpcode::G_OR: {
2327 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2328 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2329 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2330 Register Dst = I.getOperand(0).getReg();
2331 LLT Ty = MRI.getType(Dst);
2332
2333 if (!Ty.isScalar())
2334 return false;
2335
2336 unsigned Size = Ty.getSizeInBits();
2337 if (Size != 32 && Size != 64)
2338 return false;
2339
2340 Register ShiftSrc;
2341 int64_t ShiftImm;
2342 Register MaskSrc;
2343 int64_t MaskImm;
2344 if (!mi_match(
2345 Dst, MRI,
2346 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2347 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2348 return false;
2349
2350 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2351 return false;
2352
2353 int64_t Immr = Size - ShiftImm;
2354 int64_t Imms = Size - ShiftImm - 1;
2355 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2356 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2357 I.eraseFromParent();
2358 return true;
2359 }
2360 case TargetOpcode::G_FENCE: {
2361 if (I.getOperand(1).getImm() == 0)
2362 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2363 else
2364 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2365 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2366 I.eraseFromParent();
2367 return true;
2368 }
2369 default:
2370 return false;
2371 }
2372}
2373
2374bool AArch64InstructionSelector::select(MachineInstr &I) {
2375 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2375, __extension__ __PRETTY_FUNCTION__))
;
2376 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2376, __extension__ __PRETTY_FUNCTION__))
;
2377
2378 MachineBasicBlock &MBB = *I.getParent();
2379 MachineFunction &MF = *MBB.getParent();
2380 MachineRegisterInfo &MRI = MF.getRegInfo();
2381
2382 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2383 if (Subtarget->requiresStrictAlign()) {
2384 // We don't support this feature yet.
2385 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2386 return false;
2387 }
2388
2389 MIB.setInstrAndDebugLoc(I);
2390
2391 unsigned Opcode = I.getOpcode();
2392 // G_PHI requires same handling as PHI
2393 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2394 // Certain non-generic instructions also need some special handling.
2395
2396 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2397 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2398
2399 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2400 const Register DefReg = I.getOperand(0).getReg();
2401 const LLT DefTy = MRI.getType(DefReg);
2402
2403 const RegClassOrRegBank &RegClassOrBank =
2404 MRI.getRegClassOrRegBank(DefReg);
2405
2406 const TargetRegisterClass *DefRC
2407 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2408 if (!DefRC) {
2409 if (!DefTy.isValid()) {
2410 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2411 return false;
2412 }
2413 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2414 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2415 if (!DefRC) {
2416 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2417 return false;
2418 }
2419 }
2420
2421 I.setDesc(TII.get(TargetOpcode::PHI));
2422
2423 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2424 }
2425
2426 if (I.isCopy())
2427 return selectCopy(I, TII, MRI, TRI, RBI);
2428
2429 if (I.isDebugInstr())
2430 return selectDebugInstr(I, MRI, RBI);
2431
2432 return true;
2433 }
2434
2435
2436 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2437 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2438 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2439 return false;
2440 }
2441
2442 // Try to do some lowering before we start instruction selecting. These
2443 // lowerings are purely transformations on the input G_MIR and so selection
2444 // must continue after any modification of the instruction.
2445 if (preISelLower(I)) {
2446 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2447 }
2448
2449 // There may be patterns where the importer can't deal with them optimally,
2450 // but does select it to a suboptimal sequence so our custom C++ selection
2451 // code later never has a chance to work on it. Therefore, we have an early
2452 // selection attempt here to give priority to certain selection routines
2453 // over the imported ones.
2454 if (earlySelect(I))
2455 return true;
2456
2457 if (selectImpl(I, *CoverageInfo))
2458 return true;
2459
2460 LLT Ty =
2461 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2462
2463 switch (Opcode) {
2464 case TargetOpcode::G_SBFX:
2465 case TargetOpcode::G_UBFX: {
2466 static const unsigned OpcTable[2][2] = {
2467 {AArch64::UBFMWri, AArch64::UBFMXri},
2468 {AArch64::SBFMWri, AArch64::SBFMXri}};
2469 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2470 unsigned Size = Ty.getSizeInBits();
2471 unsigned Opc = OpcTable[IsSigned][Size == 64];
2472 auto Cst1 =
2473 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2474 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2474, __extension__ __PRETTY_FUNCTION__))
;
2475 auto Cst2 =
2476 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2477 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2477, __extension__ __PRETTY_FUNCTION__))
;
2478 auto LSB = Cst1->Value.getZExtValue();
2479 auto Width = Cst2->Value.getZExtValue();
2480 auto BitfieldInst =
2481 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2482 .addImm(LSB)
2483 .addImm(LSB + Width - 1);
2484 I.eraseFromParent();
2485 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2486 }
2487 case TargetOpcode::G_BRCOND:
2488 return selectCompareBranch(I, MF, MRI);
2489
2490 case TargetOpcode::G_BRINDIRECT: {
2491 I.setDesc(TII.get(AArch64::BR));
2492 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2493 }
2494
2495 case TargetOpcode::G_BRJT:
2496 return selectBrJT(I, MRI);
2497
2498 case AArch64::G_ADD_LOW: {
2499 // This op may have been separated from it's ADRP companion by the localizer
2500 // or some other code motion pass. Given that many CPUs will try to
2501 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2502 // which will later be expanded into an ADRP+ADD pair after scheduling.
2503 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2504 if (BaseMI->getOpcode() != AArch64::ADRP) {
2505 I.setDesc(TII.get(AArch64::ADDXri));
2506 I.addOperand(MachineOperand::CreateImm(0));
2507 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2508 }
2509 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2510, __extension__ __PRETTY_FUNCTION__))
2510 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2510, __extension__ __PRETTY_FUNCTION__))
;
2511 auto Op1 = BaseMI->getOperand(1);
2512 auto Op2 = I.getOperand(2);
2513 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2514 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2515 Op1.getTargetFlags())
2516 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2517 Op2.getTargetFlags());
2518 I.eraseFromParent();
2519 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2520 }
2521
2522 case TargetOpcode::G_BSWAP: {
2523 // Handle vector types for G_BSWAP directly.
2524 Register DstReg = I.getOperand(0).getReg();
2525 LLT DstTy = MRI.getType(DstReg);
2526
2527 // We should only get vector types here; everything else is handled by the
2528 // importer right now.
2529 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2530 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2531 return false;
2532 }
2533
2534 // Only handle 4 and 2 element vectors for now.
2535 // TODO: 16-bit elements.
2536 unsigned NumElts = DstTy.getNumElements();
2537 if (NumElts != 4 && NumElts != 2) {
2538 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2539 return false;
2540 }
2541
2542 // Choose the correct opcode for the supported types. Right now, that's
2543 // v2s32, v4s32, and v2s64.
2544 unsigned Opc = 0;
2545 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2546 if (EltSize == 32)
2547 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2548 : AArch64::REV32v16i8;
2549 else if (EltSize == 64)
2550 Opc = AArch64::REV64v16i8;
2551
2552 // We should always get something by the time we get here...
2553 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2553, __extension__ __PRETTY_FUNCTION__))
;
2554
2555 I.setDesc(TII.get(Opc));
2556 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2557 }
2558
2559 case TargetOpcode::G_FCONSTANT:
2560 case TargetOpcode::G_CONSTANT: {
2561 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2562
2563 const LLT s8 = LLT::scalar(8);
2564 const LLT s16 = LLT::scalar(16);
2565 const LLT s32 = LLT::scalar(32);
2566 const LLT s64 = LLT::scalar(64);
2567 const LLT s128 = LLT::scalar(128);
2568 const LLT p0 = LLT::pointer(0, 64);
2569
2570 const Register DefReg = I.getOperand(0).getReg();
2571 const LLT DefTy = MRI.getType(DefReg);
2572 const unsigned DefSize = DefTy.getSizeInBits();
2573 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2574
2575 // FIXME: Redundant check, but even less readable when factored out.
2576 if (isFP) {
2577 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2578 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2579 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2580 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2581 return false;
2582 }
2583
2584 if (RB.getID() != AArch64::FPRRegBankID) {
2585 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2586 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2587 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2588 return false;
2589 }
2590
2591 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2592 // can be sure tablegen works correctly and isn't rescued by this code.
2593 // 0.0 is not covered by tablegen for FP128. So we will handle this
2594 // scenario in the code here.
2595 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2596 return false;
2597 } else {
2598 // s32 and s64 are covered by tablegen.
2599 if (Ty != p0 && Ty != s8 && Ty != s16) {
2600 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2601 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2602 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2603 return false;
2604 }
2605
2606 if (RB.getID() != AArch64::GPRRegBankID) {
2607 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2608 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2609 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2610 return false;
2611 }
2612 }
2613
2614 if (isFP) {
2615 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2616 // For 16, 64, and 128b values, emit a constant pool load.
2617 switch (DefSize) {
2618 default:
2619 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2619)
;
2620 case 32:
2621 // For s32, use a cp load if we have optsize/minsize.
2622 if (!shouldOptForSize(&MF))
2623 break;
2624 [[fallthrough]];
2625 case 16:
2626 case 64:
2627 case 128: {
2628 auto *FPImm = I.getOperand(1).getFPImm();
2629 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2630 if (!LoadMI) {
2631 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2632 return false;
2633 }
2634 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2635 I.eraseFromParent();
2636 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2637 }
2638 }
2639
2640 // Either emit a FMOV, or emit a copy to emit a normal mov.
2641 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2642, __extension__ __PRETTY_FUNCTION__))
2642 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2642, __extension__ __PRETTY_FUNCTION__))
;
2643 const Register DefGPRReg =
2644 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2645 MachineOperand &RegOp = I.getOperand(0);
2646 RegOp.setReg(DefGPRReg);
2647 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2648 MIB.buildCopy({DefReg}, {DefGPRReg});
2649
2650 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2651 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2652 return false;
2653 }
2654
2655 MachineOperand &ImmOp = I.getOperand(1);
2656 // FIXME: Is going through int64_t always correct?
2657 ImmOp.ChangeToImmediate(
2658 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2659 } else if (I.getOperand(1).isCImm()) {
2660 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2661 I.getOperand(1).ChangeToImmediate(Val);
2662 } else if (I.getOperand(1).isImm()) {
2663 uint64_t Val = I.getOperand(1).getImm();
2664 I.getOperand(1).ChangeToImmediate(Val);
2665 }
2666
2667 const unsigned MovOpc =
2668 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2669 I.setDesc(TII.get(MovOpc));
2670 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2671 return true;
2672 }
2673 case TargetOpcode::G_EXTRACT: {
2674 Register DstReg = I.getOperand(0).getReg();
2675 Register SrcReg = I.getOperand(1).getReg();
2676 LLT SrcTy = MRI.getType(SrcReg);
2677 LLT DstTy = MRI.getType(DstReg);
2678 (void)DstTy;
2679 unsigned SrcSize = SrcTy.getSizeInBits();
2680
2681 if (SrcTy.getSizeInBits() > 64) {
2682 // This should be an extract of an s128, which is like a vector extract.
2683 if (SrcTy.getSizeInBits() != 128)
2684 return false;
2685 // Only support extracting 64 bits from an s128 at the moment.
2686 if (DstTy.getSizeInBits() != 64)
2687 return false;
2688
2689 unsigned Offset = I.getOperand(2).getImm();
2690 if (Offset % 64 != 0)
2691 return false;
2692
2693 // Check we have the right regbank always.
2694 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2695 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2696 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2696, __extension__ __PRETTY_FUNCTION__))
;
2697
2698 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2699 auto NewI =
2700 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2701 .addUse(SrcReg, 0,
2702 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2703 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2704 AArch64::GPR64RegClass, NewI->getOperand(0));
2705 I.eraseFromParent();
2706 return true;
2707 }
2708
2709 // Emit the same code as a vector extract.
2710 // Offset must be a multiple of 64.
2711 unsigned LaneIdx = Offset / 64;
2712 MachineInstr *Extract = emitExtractVectorElt(
2713 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2714 if (!Extract)
2715 return false;
2716 I.eraseFromParent();
2717 return true;
2718 }
2719
2720 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2721 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2722 Ty.getSizeInBits() - 1);
2723
2724 if (SrcSize < 64) {
2725 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
2726 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
;
2727 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2728 }
2729
2730 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2731 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2732 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2733 .addReg(DstReg, 0, AArch64::sub_32);
2734 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2735 AArch64::GPR32RegClass, MRI);
2736 I.getOperand(0).setReg(DstReg);
2737
2738 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739 }
2740
2741 case TargetOpcode::G_INSERT: {
2742 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2743 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2744 unsigned DstSize = DstTy.getSizeInBits();
2745 // Larger inserts are vectors, same-size ones should be something else by
2746 // now (split up or turned into COPYs).
2747 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2748 return false;
2749
2750 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2751 unsigned LSB = I.getOperand(3).getImm();
2752 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2753 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2754 MachineInstrBuilder(MF, I).addImm(Width - 1);
2755
2756 if (DstSize < 64) {
2757 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
2758 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
;
2759 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2760 }
2761
2762 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2763 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2764 TII.get(AArch64::SUBREG_TO_REG))
2765 .addDef(SrcReg)
2766 .addImm(0)
2767 .addUse(I.getOperand(2).getReg())
2768 .addImm(AArch64::sub_32);
2769 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2770 AArch64::GPR32RegClass, MRI);
2771 I.getOperand(2).setReg(SrcReg);
2772
2773 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2774 }
2775 case TargetOpcode::G_FRAME_INDEX: {
2776 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2777 if (Ty != LLT::pointer(0, 64)) {
2778 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2779 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2780 return false;
2781 }
2782 I.setDesc(TII.get(AArch64::ADDXri));
2783
2784 // MOs for a #0 shifted immediate.
2785 I.addOperand(MachineOperand::CreateImm(0));
2786 I.addOperand(MachineOperand::CreateImm(0));
2787
2788 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2789 }
2790
2791 case TargetOpcode::G_GLOBAL_VALUE: {
2792 auto GV = I.getOperand(1).getGlobal();
2793 if (GV->isThreadLocal())
2794 return selectTLSGlobalValue(I, MRI);
2795
2796 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2797 if (OpFlags & AArch64II::MO_GOT) {
2798 I.setDesc(TII.get(AArch64::LOADgot));
2799 I.getOperand(1).setTargetFlags(OpFlags);
2800 } else if (TM.getCodeModel() == CodeModel::Large) {
2801 // Materialize the global using movz/movk instructions.
2802 materializeLargeCMVal(I, GV, OpFlags);
2803 I.eraseFromParent();
2804 return true;
2805 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2806 I.setDesc(TII.get(AArch64::ADR));
2807 I.getOperand(1).setTargetFlags(OpFlags);
2808 } else {
2809 I.setDesc(TII.get(AArch64::MOVaddr));
2810 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2811 MachineInstrBuilder MIB(MF, I);
2812 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2813 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2814 }
2815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2816 }
2817
2818 case TargetOpcode::G_ZEXTLOAD:
2819 case TargetOpcode::G_LOAD:
2820 case TargetOpcode::G_STORE: {
2821 GLoadStore &LdSt = cast<GLoadStore>(I);
2822 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2823 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2824
2825 if (PtrTy != LLT::pointer(0, 64)) {
2826 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2827 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2828 return false;
2829 }
2830
2831 uint64_t MemSizeInBytes = LdSt.getMemSize();
2832 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2833 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2834
2835 // Need special instructions for atomics that affect ordering.
2836 if (Order != AtomicOrdering::NotAtomic &&
2837 Order != AtomicOrdering::Unordered &&
2838 Order != AtomicOrdering::Monotonic) {
2839 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2839, __extension__ __PRETTY_FUNCTION__))
;
2840 if (MemSizeInBytes > 64)
2841 return false;
2842
2843 if (isa<GLoad>(LdSt)) {
2844 static constexpr unsigned LDAPROpcodes[] = {
2845 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2846 static constexpr unsigned LDAROpcodes[] = {
2847 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2848 ArrayRef<unsigned> Opcodes =
2849 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2850 ? LDAPROpcodes
2851 : LDAROpcodes;
2852 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2853 } else {
2854 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2855 AArch64::STLRW, AArch64::STLRX};
2856 Register ValReg = LdSt.getReg(0);
2857 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2858 // Emit a subreg copy of 32 bits.
2859 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2860 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2861 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2862 I.getOperand(0).setReg(NewVal);
2863 }
2864 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2865 }
2866 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2867 return true;
2868 }
2869
2870#ifndef NDEBUG
2871 const Register PtrReg = LdSt.getPointerReg();
2872 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2873 // Check that the pointer register is valid.
2874 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2875, __extension__ __PRETTY_FUNCTION__))
2875 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2875, __extension__ __PRETTY_FUNCTION__))
;
2876 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
2877 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
;
2878#endif
2879
2880 const Register ValReg = LdSt.getReg(0);
2881 const LLT ValTy = MRI.getType(ValReg);
2882 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2883
2884 // The code below doesn't support truncating stores, so we need to split it
2885 // again.
2886 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2887 unsigned SubReg;
2888 LLT MemTy = LdSt.getMMO().getMemoryType();
2889 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2890 if (!getSubRegForClass(RC, TRI, SubReg))
2891 return false;
2892
2893 // Generate a subreg copy.
2894 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2895 .addReg(ValReg, 0, SubReg)
2896 .getReg(0);
2897 RBI.constrainGenericRegister(Copy, *RC, MRI);
2898 LdSt.getOperand(0).setReg(Copy);
2899 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2900 // If this is an any-extending load from the FPR bank, split it into a regular
2901 // load + extend.
2902 if (RB.getID() == AArch64::FPRRegBankID) {
2903 unsigned SubReg;
2904 LLT MemTy = LdSt.getMMO().getMemoryType();
2905 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2906 if (!getSubRegForClass(RC, TRI, SubReg))
2907 return false;
2908 Register OldDst = LdSt.getReg(0);
2909 Register NewDst =
2910 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2911 LdSt.getOperand(0).setReg(NewDst);
2912 MRI.setRegBank(NewDst, RB);
2913 // Generate a SUBREG_TO_REG to extend it.
2914 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2915 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2916 .addImm(0)
2917 .addUse(NewDst)
2918 .addImm(SubReg);
2919 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2920 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2921 MIB.setInstr(LdSt);
2922 }
2923 }
2924
2925 // Helper lambda for partially selecting I. Either returns the original
2926 // instruction with an updated opcode, or a new instruction.
2927 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2928 bool IsStore = isa<GStore>(I);
1
Assuming 'I' is not a 'GStore'
2929 const unsigned NewOpc =
2930 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2931 if (NewOpc == I.getOpcode())
2
Taking false branch
2932 return nullptr;
2933 // Check if we can fold anything into the addressing mode.
2934 auto AddrModeFns =
2935 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2936 if (!AddrModeFns) {
2937 // Can't fold anything. Use the original instruction.
2938 I.setDesc(TII.get(NewOpc));
2939 I.addOperand(MachineOperand::CreateImm(0));
2940 return &I;
2941 }
2942
2943 // Folded something. Create a new instruction and return it.
2944 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2945 Register CurValReg = I.getOperand(0).getReg();
2946 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2947 NewInst.cloneMemRefs(I);
2948 for (auto &Fn : *AddrModeFns)
2949 Fn(NewInst);
2950 I.eraseFromParent();
2951 return &*NewInst;
2952 };
2953
2954 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2955 if (!LoadStore)
2956 return false;
2957
2958 // If we're storing a 0, use WZR/XZR.
2959 if (Opcode == TargetOpcode::G_STORE) {
2960 auto CVal = getIConstantVRegValWithLookThrough(
2961 LoadStore->getOperand(0).getReg(), MRI);
2962 if (CVal && CVal->Value == 0) {
2963 switch (LoadStore->getOpcode()) {
2964 case AArch64::STRWui:
2965 case AArch64::STRHHui:
2966 case AArch64::STRBBui:
2967 LoadStore->getOperand(0).setReg(AArch64::WZR);
2968 break;
2969 case AArch64::STRXui:
2970 LoadStore->getOperand(0).setReg(AArch64::XZR);
2971 break;
2972 }
2973 }
2974 }
2975
2976 if (IsZExtLoad) {
2977 // The zextload from a smaller type to i32 should be handled by the
2978 // importer.
2979 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2980 return false;
2981 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2982 // and zero_extend with SUBREG_TO_REG.
2983 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2984 Register DstReg = LoadStore->getOperand(0).getReg();
2985 LoadStore->getOperand(0).setReg(LdReg);
2986
2987 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2988 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2989 .addImm(0)
2990 .addUse(LdReg)
2991 .addImm(AArch64::sub_32);
2992 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2993 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2994 MRI);
2995 }
2996 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2997 }
2998
2999 case TargetOpcode::G_SMULH:
3000 case TargetOpcode::G_UMULH: {
3001 // Reject the various things we don't support yet.
3002 if (unsupportedBinOp(I, RBI, MRI, TRI))
3003 return false;
3004
3005 const Register DefReg = I.getOperand(0).getReg();
3006 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3007
3008 if (RB.getID() != AArch64::GPRRegBankID) {
3009 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
3010 return false;
3011 }
3012
3013 if (Ty != LLT::scalar(64)) {
3014 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
3015 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
3016 return false;
3017 }
3018
3019 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3020 : AArch64::UMULHrr;
3021 I.setDesc(TII.get(NewOpc));
3022
3023 // Now that we selected an opcode, we need to constrain the register
3024 // operands to use appropriate classes.
3025 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3026 }
3027 case TargetOpcode::G_LSHR:
3028 case TargetOpcode::G_ASHR:
3029 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3030 return selectVectorAshrLshr(I, MRI);
3031 [[fallthrough]];
3032 case TargetOpcode::G_SHL:
3033 if (Opcode == TargetOpcode::G_SHL &&
3034 MRI.getType(I.getOperand(0).getReg()).isVector())
3035 return selectVectorSHL(I, MRI);
3036
3037 // These shifts were legalized to have 64 bit shift amounts because we
3038 // want to take advantage of the selection patterns that assume the
3039 // immediates are s64s, however, selectBinaryOp will assume both operands
3040 // will have the same bit size.
3041 {
3042 Register SrcReg = I.getOperand(1).getReg();
3043 Register ShiftReg = I.getOperand(2).getReg();
3044 const LLT ShiftTy = MRI.getType(ShiftReg);
3045 const LLT SrcTy = MRI.getType(SrcReg);
3046 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3047 ShiftTy.getSizeInBits() == 64) {
3048 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3048, __extension__ __PRETTY_FUNCTION__))
;
3049 // Insert a subregister copy to implement a 64->32 trunc
3050 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3051 .addReg(ShiftReg, 0, AArch64::sub_32);
3052 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3053 I.getOperand(2).setReg(Trunc.getReg(0));
3054 }
3055 }
3056 [[fallthrough]];
3057 case TargetOpcode::G_OR: {
3058 // Reject the various things we don't support yet.
3059 if (unsupportedBinOp(I, RBI, MRI, TRI))
3060 return false;
3061
3062 const unsigned OpSize = Ty.getSizeInBits();
3063
3064 const Register DefReg = I.getOperand(0).getReg();
3065 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3066
3067 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3068 if (NewOpc == I.getOpcode())
3069 return false;
3070
3071 I.setDesc(TII.get(NewOpc));
3072 // FIXME: Should the type be always reset in setDesc?
3073
3074 // Now that we selected an opcode, we need to constrain the register
3075 // operands to use appropriate classes.
3076 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3077 }
3078
3079 case TargetOpcode::G_PTR_ADD: {
3080 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3081 I.eraseFromParent();
3082 return true;
3083 }
3084 case TargetOpcode::G_SADDO:
3085 case TargetOpcode::G_UADDO:
3086 case TargetOpcode::G_SSUBO:
3087 case TargetOpcode::G_USUBO: {
3088 // Emit the operation and get the correct condition code.
3089 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3090 I.getOperand(2), I.getOperand(3), MIB);
3091
3092 // Now, put the overflow result in the register given by the first operand
3093 // to the overflow op. CSINC increments the result when the predicate is
3094 // false, so to get the increment when it's true, we need to use the
3095 // inverse. In this case, we want to increment when carry is set.
3096 Register ZReg = AArch64::WZR;
3097 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3098 getInvertedCondCode(OpAndCC.second), MIB);
3099 I.eraseFromParent();
3100 return true;
3101 }
3102
3103 case TargetOpcode::G_PTRMASK: {
3104 Register MaskReg = I.getOperand(2).getReg();
3105 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3106 // TODO: Implement arbitrary cases
3107 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3108 return false;
3109
3110 uint64_t Mask = *MaskVal;
3111 I.setDesc(TII.get(AArch64::ANDXri));
3112 I.getOperand(2).ChangeToImmediate(
3113 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3114
3115 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3116 }
3117 case TargetOpcode::G_PTRTOINT:
3118 case TargetOpcode::G_TRUNC: {
3119 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3120 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3121
3122 const Register DstReg = I.getOperand(0).getReg();
3123 const Register SrcReg = I.getOperand(1).getReg();
3124
3125 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3126 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3127
3128 if (DstRB.getID() != SrcRB.getID()) {
3129 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3130 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3131 return false;
3132 }
3133
3134 if (DstRB.getID() == AArch64::GPRRegBankID) {
3135 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3136 if (!DstRC)
3137 return false;
3138
3139 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3140 if (!SrcRC)
3141 return false;
3142
3143 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3144 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3145 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3146 return false;
3147 }
3148
3149 if (DstRC == SrcRC) {
3150 // Nothing to be done
3151 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3152 SrcTy == LLT::scalar(64)) {
3153 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3153)
;
3154 return false;
3155 } else if (DstRC == &AArch64::GPR32RegClass &&
3156 SrcRC == &AArch64::GPR64RegClass) {
3157 I.getOperand(1).setSubReg(AArch64::sub_32);
3158 } else {
3159 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3160 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3161 return false;
3162 }
3163
3164 I.setDesc(TII.get(TargetOpcode::COPY));
3165 return true;
3166 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3167 if (DstTy == LLT::fixed_vector(4, 16) &&
3168 SrcTy == LLT::fixed_vector(4, 32)) {
3169 I.setDesc(TII.get(AArch64::XTNv4i16));
3170 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3171 return true;
3172 }
3173
3174 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3175 MachineInstr *Extract = emitExtractVectorElt(
3176 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3177 if (!Extract)
3178 return false;
3179 I.eraseFromParent();
3180 return true;
3181 }
3182
3183 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3184 if (Opcode == TargetOpcode::G_PTRTOINT) {
3185 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3185, __extension__ __PRETTY_FUNCTION__))
;
3186 I.setDesc(TII.get(TargetOpcode::COPY));
3187 return selectCopy(I, TII, MRI, TRI, RBI);
3188 }
3189 }
3190
3191 return false;
3192 }
3193
3194 case TargetOpcode::G_ANYEXT: {
3195 if (selectUSMovFromExtend(I, MRI))
3196 return true;
3197
3198 const Register DstReg = I.getOperand(0).getReg();
3199 const Register SrcReg = I.getOperand(1).getReg();
3200
3201 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3202 if (RBDst.getID() != AArch64::GPRRegBankID) {
3203 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3204 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3205 return false;
3206 }
3207
3208 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3209 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3210 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3211 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3212 return false;
3213 }
3214
3215 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3216
3217 if (DstSize == 0) {
3218 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3219 return false;
3220 }
3221
3222 if (DstSize != 64 && DstSize > 32) {
3223 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3224 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3225 return false;
3226 }
3227 // At this point G_ANYEXT is just like a plain COPY, but we need
3228 // to explicitly form the 64-bit value if any.
3229 if (DstSize > 32) {
3230 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3231 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3232 .addDef(ExtSrc)
3233 .addImm(0)
3234 .addUse(SrcReg)
3235 .addImm(AArch64::sub_32);
3236 I.getOperand(1).setReg(ExtSrc);
3237 }
3238 return selectCopy(I, TII, MRI, TRI, RBI);
3239 }
3240
3241 case TargetOpcode::G_ZEXT:
3242 case TargetOpcode::G_SEXT_INREG:
3243 case TargetOpcode::G_SEXT: {
3244 if (selectUSMovFromExtend(I, MRI))
3245 return true;
3246
3247 unsigned Opcode = I.getOpcode();
3248 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3249 const Register DefReg = I.getOperand(0).getReg();
3250 Register SrcReg = I.getOperand(1).getReg();
3251 const LLT DstTy = MRI.getType(DefReg);
3252 const LLT SrcTy = MRI.getType(SrcReg);
3253 unsigned DstSize = DstTy.getSizeInBits();
3254 unsigned SrcSize = SrcTy.getSizeInBits();
3255
3256 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3257 // extended is encoded in the imm.
3258 if (Opcode == TargetOpcode::G_SEXT_INREG)
3259 SrcSize = I.getOperand(2).getImm();
3260
3261 if (DstTy.isVector())
3262 return false; // Should be handled by imported patterns.
3263
3264 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
3265 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
3266 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
;
3267
3268 MachineInstr *ExtI;
3269
3270 // First check if we're extending the result of a load which has a dest type
3271 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3272 // GPR register on AArch64 and all loads which are smaller automatically
3273 // zero-extend the upper bits. E.g.
3274 // %v(s8) = G_LOAD %p, :: (load 1)
3275 // %v2(s32) = G_ZEXT %v(s8)
3276 if (!IsSigned) {
3277 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3278 bool IsGPR =
3279 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3280 if (LoadMI && IsGPR) {
3281 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3282 unsigned BytesLoaded = MemOp->getSize();
3283 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3284 return selectCopy(I, TII, MRI, TRI, RBI);
3285 }
3286
3287 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3288 // + SUBREG_TO_REG.
3289 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3290 Register SubregToRegSrc =
3291 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3292 const Register ZReg = AArch64::WZR;
3293 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3294 .addImm(0);
3295
3296 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3297 .addImm(0)
3298 .addUse(SubregToRegSrc)
3299 .addImm(AArch64::sub_32);
3300
3301 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3302 MRI)) {
3303 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3304 return false;
3305 }
3306
3307 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3308 MRI)) {
3309 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3310 return false;
3311 }
3312
3313 I.eraseFromParent();
3314 return true;
3315 }
3316 }
3317
3318 if (DstSize == 64) {
3319 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3320 // FIXME: Can we avoid manually doing this?
3321 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3322 MRI)) {
3323 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3324 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3325 return false;
3326 }
3327 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3328 {&AArch64::GPR64RegClass}, {})
3329 .addImm(0)
3330 .addUse(SrcReg)
3331 .addImm(AArch64::sub_32)
3332 .getReg(0);
3333 }
3334
3335 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3336 {DefReg}, {SrcReg})
3337 .addImm(0)
3338 .addImm(SrcSize - 1);
3339 } else if (DstSize <= 32) {
3340 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3341 {DefReg}, {SrcReg})
3342 .addImm(0)
3343 .addImm(SrcSize - 1);
3344 } else {
3345 return false;
3346 }
3347
3348 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3349 I.eraseFromParent();
3350 return true;
3351 }
3352
3353 case TargetOpcode::G_SITOFP:
3354 case TargetOpcode::G_UITOFP:
3355 case TargetOpcode::G_FPTOSI:
3356 case TargetOpcode::G_FPTOUI: {
3357 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3358 SrcTy = MRI.getType(I.getOperand(1).getReg());
3359 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3360 if (NewOpc == Opcode)
3361 return false;
3362
3363 I.setDesc(TII.get(NewOpc));
3364 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3365 I.setFlags(MachineInstr::NoFPExcept);
3366
3367 return true;
3368 }
3369
3370 case TargetOpcode::G_FREEZE:
3371 return selectCopy(I, TII, MRI, TRI, RBI);
3372
3373 case TargetOpcode::G_INTTOPTR:
3374 // The importer is currently unable to import pointer types since they
3375 // didn't exist in SelectionDAG.
3376 return selectCopy(I, TII, MRI, TRI, RBI);
3377
3378 case TargetOpcode::G_BITCAST:
3379 // Imported SelectionDAG rules can handle every bitcast except those that
3380 // bitcast from a type to the same type. Ideally, these shouldn't occur
3381 // but we might not run an optimizer that deletes them. The other exception
3382 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3383 // of them.
3384 return selectCopy(I, TII, MRI, TRI, RBI);
3385
3386 case TargetOpcode::G_SELECT: {
3387 auto &Sel = cast<GSelect>(I);
3388 const Register CondReg = Sel.getCondReg();
3389 const Register TReg = Sel.getTrueReg();
3390 const Register FReg = Sel.getFalseReg();
3391
3392 if (tryOptSelect(Sel))
3393 return true;
3394
3395 // Make sure to use an unused vreg instead of wzr, so that the peephole
3396 // optimizations will be able to optimize these.
3397 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3398 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3399 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3400 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3401 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3402 return false;
3403 Sel.eraseFromParent();
3404 return true;
3405 }
3406 case TargetOpcode::G_ICMP: {
3407 if (Ty.isVector())
3408 return selectVectorICmp(I, MRI);
3409
3410 if (Ty != LLT::scalar(32)) {
3411 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3412 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3413 return false;
3414 }
3415
3416 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3417 const AArch64CC::CondCode InvCC =
3418 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3419 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3420 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3421 /*Src2=*/AArch64::WZR, InvCC, MIB);
3422 I.eraseFromParent();
3423 return true;
3424 }
3425
3426 case TargetOpcode::G_FCMP: {
3427 CmpInst::Predicate Pred =
3428 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3429 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3430 Pred) ||
3431 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3432 return false;
3433 I.eraseFromParent();
3434 return true;
3435 }
3436 case TargetOpcode::G_VASTART:
3437 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3438 : selectVaStartAAPCS(I, MF, MRI);
3439 case TargetOpcode::G_INTRINSIC:
3440 return selectIntrinsic(I, MRI);
3441 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3442 return selectIntrinsicWithSideEffects(I, MRI);
3443 case TargetOpcode::G_IMPLICIT_DEF: {
3444 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3445 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3446 const Register DstReg = I.getOperand(0).getReg();
3447 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3448 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3449 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3450 return true;
3451 }
3452 case TargetOpcode::G_BLOCK_ADDR: {
3453 if (TM.getCodeModel() == CodeModel::Large) {
3454 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3455 I.eraseFromParent();
3456 return true;
3457 } else {
3458 I.setDesc(TII.get(AArch64::MOVaddrBA));
3459 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3460 I.getOperand(0).getReg())
3461 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3462 /* Offset */ 0, AArch64II::MO_PAGE)
3463 .addBlockAddress(
3464 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3465 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3466 I.eraseFromParent();
3467 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3468 }
3469 }
3470 case AArch64::G_DUP: {
3471 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3472 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3473 // difficult because at RBS we may end up pessimizing the fpr case if we
3474 // decided to add an anyextend to fix this. Manual selection is the most
3475 // robust solution for now.
3476 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3477 AArch64::GPRRegBankID)
3478 return false; // We expect the fpr regbank case to be imported.
3479 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3480 if (VecTy == LLT::fixed_vector(8, 8))
3481 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3482 else if (VecTy == LLT::fixed_vector(16, 8))
3483 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3484 else if (VecTy == LLT::fixed_vector(4, 16))
3485 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3486 else if (VecTy == LLT::fixed_vector(8, 16))
3487 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3488 else
3489 return false;
3490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3491 }
3492 case TargetOpcode::G_INTRINSIC_TRUNC:
3493 return selectIntrinsicTrunc(I, MRI);
3494 case TargetOpcode::G_INTRINSIC_ROUND:
3495 return selectIntrinsicRound(I, MRI);
3496 case TargetOpcode::G_BUILD_VECTOR:
3497 return selectBuildVector(I, MRI);
3498 case TargetOpcode::G_MERGE_VALUES:
3499 return selectMergeValues(I, MRI);
3500 case TargetOpcode::G_UNMERGE_VALUES:
3501 return selectUnmergeValues(I, MRI);
3502 case TargetOpcode::G_SHUFFLE_VECTOR:
3503 return selectShuffleVector(I, MRI);
3504 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3505 return selectExtractElt(I, MRI);
3506 case TargetOpcode::G_INSERT_VECTOR_ELT:
3507 return selectInsertElt(I, MRI);
3508 case TargetOpcode::G_CONCAT_VECTORS:
3509 return selectConcatVectors(I, MRI);
3510 case TargetOpcode::G_JUMP_TABLE:
3511 return selectJumpTable(I, MRI);
3512 case TargetOpcode::G_VECREDUCE_FADD:
3513 case TargetOpcode::G_VECREDUCE_ADD:
3514 return selectReduction(I, MRI);
3515 case TargetOpcode::G_MEMCPY:
3516 case TargetOpcode::G_MEMCPY_INLINE:
3517 case TargetOpcode::G_MEMMOVE:
3518 case TargetOpcode::G_MEMSET:
3519 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3519, __extension__ __PRETTY_FUNCTION__))
;
3520 return selectMOPS(I, MRI);
3521 }
3522
3523 return false;
3524}
3525
3526bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3527 MachineRegisterInfo &MRI) {
3528 Register VecReg = I.getOperand(1).getReg();
3529 LLT VecTy = MRI.getType(VecReg);
3530 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3531 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3532 // a subregister copy afterwards.
3533 if (VecTy == LLT::fixed_vector(2, 32)) {
3534 Register DstReg = I.getOperand(0).getReg();
3535 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3536 {VecReg, VecReg});
3537 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3538 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3539 .getReg(0);
3540 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3541 I.eraseFromParent();
3542 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3543 }
3544
3545 unsigned Opc = 0;
3546 if (VecTy == LLT::fixed_vector(16, 8))
3547 Opc = AArch64::ADDVv16i8v;
3548 else if (VecTy == LLT::fixed_vector(8, 16))
3549 Opc = AArch64::ADDVv8i16v;
3550 else if (VecTy == LLT::fixed_vector(4, 32))
3551 Opc = AArch64::ADDVv4i32v;
3552 else if (VecTy == LLT::fixed_vector(2, 64))
3553 Opc = AArch64::ADDPv2i64p;
3554 else {
3555 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3556 return false;
3557 }
3558 I.setDesc(TII.get(Opc));
3559 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3560 }
3561
3562 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3563 unsigned Opc = 0;
3564 if (VecTy == LLT::fixed_vector(2, 32))
3565 Opc = AArch64::FADDPv2i32p;
3566 else if (VecTy == LLT::fixed_vector(2, 64))
3567 Opc = AArch64::FADDPv2i64p;
3568 else {
3569 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3570 return false;
3571 }
3572 I.setDesc(TII.get(Opc));
3573 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3574 }
3575 return false;
3576}
3577
3578bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3579 MachineRegisterInfo &MRI) {
3580 unsigned Mopcode;
3581 switch (GI.getOpcode()) {
3582 case TargetOpcode::G_MEMCPY:
3583 case TargetOpcode::G_MEMCPY_INLINE:
3584 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3585 break;
3586 case TargetOpcode::G_MEMMOVE:
3587 Mopcode = AArch64::MOPSMemoryMovePseudo;
3588 break;
3589 case TargetOpcode::G_MEMSET:
3590 // For tagged memset see llvm.aarch64.mops.memset.tag
3591 Mopcode = AArch64::MOPSMemorySetPseudo;
3592 break;
3593 }
3594
3595 auto &DstPtr = GI.getOperand(0);
3596 auto &SrcOrVal = GI.getOperand(1);
3597 auto &Size = GI.getOperand(2);
3598
3599 // Create copies of the registers that can be clobbered.
3600 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3601 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3602 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3603
3604 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3605 const auto &SrcValRegClass =
3606 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3607
3608 // Constrain to specific registers
3609 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3610 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3611 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3612
3613 MIB.buildCopy(DstPtrCopy, DstPtr);
3614 MIB.buildCopy(SrcValCopy, SrcOrVal);
3615 MIB.buildCopy(SizeCopy, Size);
3616
3617 // New instruction uses the copied registers because it must update them.
3618 // The defs are not used since they don't exist in G_MEM*. They are still
3619 // tied.
3620 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3621 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3622 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3623 if (IsSet) {
3624 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3625 {DstPtrCopy, SizeCopy, SrcValCopy});
3626 } else {
3627 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3628 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3629 {DstPtrCopy, SrcValCopy, SizeCopy});
3630 }
3631
3632 GI.eraseFromParent();
3633 return true;
3634}
3635
3636bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3637 MachineRegisterInfo &MRI) {
3638 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3638, __extension__ __PRETTY_FUNCTION__))
;
3639 Register JTAddr = I.getOperand(0).getReg();
3640 unsigned JTI = I.getOperand(1).getIndex();
3641 Register Index = I.getOperand(2).getReg();
3642
3643 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3644 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3645
3646 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3647 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3648 {TargetReg, ScratchReg}, {JTAddr, Index})
3649 .addJumpTableIndex(JTI);
3650 // Build the indirect branch.
3651 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3652 I.eraseFromParent();
3653 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3654}
3655
3656bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3657 MachineRegisterInfo &MRI) {
3658 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3658, __extension__ __PRETTY_FUNCTION__))
;
3659 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3659, __extension__ __PRETTY_FUNCTION__))
;
3660
3661 Register DstReg = I.getOperand(0).getReg();
3662 unsigned JTI = I.getOperand(1).getIndex();
3663 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3664 auto MovMI =
3665 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3666 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3667 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3668 I.eraseFromParent();
3669 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3670}
3671
3672bool AArch64InstructionSelector::selectTLSGlobalValue(
3673 MachineInstr &I, MachineRegisterInfo &MRI) {
3674 if (!STI.isTargetMachO())
3675 return false;
3676 MachineFunction &MF = *I.getParent()->getParent();
3677 MF.getFrameInfo().setAdjustsStack(true);
3678
3679 const auto &GlobalOp = I.getOperand(1);
3680 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3681, __extension__ __PRETTY_FUNCTION__))
3681 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3681, __extension__ __PRETTY_FUNCTION__))
;
3682 const GlobalValue &GV = *GlobalOp.getGlobal();
3683
3684 auto LoadGOT =
3685 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3686 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3687
3688 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3689 {LoadGOT.getReg(0)})
3690 .addImm(0);
3691
3692 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3693 // TLS calls preserve all registers except those that absolutely must be
3694 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3695 // silly).
3696 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3697 .addUse(AArch64::X0, RegState::Implicit)
3698 .addDef(AArch64::X0, RegState::Implicit)
3699 .addRegMask(TRI.getTLSCallPreservedMask());
3700
3701 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3702 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3703 MRI);
3704 I.eraseFromParent();
3705 return true;
3706}
3707
3708bool AArch64InstructionSelector::selectIntrinsicTrunc(
3709 MachineInstr &I, MachineRegisterInfo &MRI) const {
3710 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3711
3712 // Select the correct opcode.
3713 unsigned Opc = 0;
3714 if (!SrcTy.isVector()) {
3715 switch (SrcTy.getSizeInBits()) {
3716 default:
3717 case 16:
3718 Opc = AArch64::FRINTZHr;
3719 break;
3720 case 32:
3721 Opc = AArch64::FRINTZSr;
3722 break;
3723 case 64:
3724 Opc = AArch64::FRINTZDr;
3725 break;
3726 }
3727 } else {
3728 unsigned NumElts = SrcTy.getNumElements();
3729 switch (SrcTy.getElementType().getSizeInBits()) {
3730 default:
3731 break;
3732 case 16:
3733 if (NumElts == 4)
3734 Opc = AArch64::FRINTZv4f16;
3735 else if (NumElts == 8)
3736 Opc = AArch64::FRINTZv8f16;
3737 break;
3738 case 32:
3739 if (NumElts == 2)
3740 Opc = AArch64::FRINTZv2f32;
3741 else if (NumElts == 4)
3742 Opc = AArch64::FRINTZv4f32;
3743 break;
3744 case 64:
3745 if (NumElts == 2)
3746 Opc = AArch64::FRINTZv2f64;
3747 break;
3748 }
3749 }
3750
3751 if (!Opc) {
3752 // Didn't get an opcode above, bail.
3753 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3754 return false;
3755 }
3756
3757 // Legalization would have set us up perfectly for this; we just need to
3758 // set the opcode and move on.
3759 I.setDesc(TII.get(Opc));
3760 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3761}
3762
3763bool AArch64InstructionSelector::selectIntrinsicRound(
3764 MachineInstr &I, MachineRegisterInfo &MRI) const {
3765 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3766
3767 // Select the correct opcode.
3768 unsigned Opc = 0;
3769 if (!SrcTy.isVector()) {
3770 switch (SrcTy.getSizeInBits()) {
3771 default:
3772 case 16:
3773 Opc = AArch64::FRINTAHr;
3774 break;
3775 case 32:
3776 Opc = AArch64::FRINTASr;
3777 break;
3778 case 64:
3779 Opc = AArch64::FRINTADr;
3780 break;
3781 }
3782 } else {
3783 unsigned NumElts = SrcTy.getNumElements();
3784 switch (SrcTy.getElementType().getSizeInBits()) {
3785 default:
3786 break;
3787 case 16:
3788 if (NumElts == 4)
3789 Opc = AArch64::FRINTAv4f16;
3790 else if (NumElts == 8)
3791 Opc = AArch64::FRINTAv8f16;
3792 break;
3793 case 32:
3794 if (NumElts == 2)
3795 Opc = AArch64::FRINTAv2f32;
3796 else if (NumElts == 4)
3797 Opc = AArch64::FRINTAv4f32;
3798 break;
3799 case 64:
3800 if (NumElts == 2)
3801 Opc = AArch64::FRINTAv2f64;
3802 break;
3803 }
3804 }
3805
3806 if (!Opc) {
3807 // Didn't get an opcode above, bail.
3808 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3809 return false;
3810 }
3811
3812 // Legalization would have set us up perfectly for this; we just need to
3813 // set the opcode and move on.
3814 I.setDesc(TII.get(Opc));
3815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3816}
3817
3818bool AArch64InstructionSelector::selectVectorICmp(
3819 MachineInstr &I, MachineRegisterInfo &MRI) {
3820 Register DstReg = I.getOperand(0).getReg();
3821 LLT DstTy = MRI.getType(DstReg);
3822 Register SrcReg = I.getOperand(2).getReg();
3823 Register Src2Reg = I.getOperand(3).getReg();
3824 LLT SrcTy = MRI.getType(SrcReg);
3825
3826 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3827 unsigned NumElts = DstTy.getNumElements();
3828
3829 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3830 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3831 // Third index is cc opcode:
3832 // 0 == eq
3833 // 1 == ugt
3834 // 2 == uge
3835 // 3 == ult
3836 // 4 == ule
3837 // 5 == sgt
3838 // 6 == sge
3839 // 7 == slt
3840 // 8 == sle
3841 // ne is done by negating 'eq' result.
3842
3843 // This table below assumes that for some comparisons the operands will be
3844 // commuted.
3845 // ult op == commute + ugt op
3846 // ule op == commute + uge op
3847 // slt op == commute + sgt op
3848 // sle op == commute + sge op
3849 unsigned PredIdx = 0;
3850 bool SwapOperands = false;
3851 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3852 switch (Pred) {
3853 case CmpInst::ICMP_NE:
3854 case CmpInst::ICMP_EQ:
3855 PredIdx = 0;
3856 break;
3857 case CmpInst::ICMP_UGT:
3858 PredIdx = 1;
3859 break;
3860 case CmpInst::ICMP_UGE:
3861 PredIdx = 2;
3862 break;
3863 case CmpInst::ICMP_ULT:
3864 PredIdx = 3;
3865 SwapOperands = true;
3866 break;
3867 case CmpInst::ICMP_ULE:
3868 PredIdx = 4;
3869 SwapOperands = true;
3870 break;
3871 case CmpInst::ICMP_SGT:
3872 PredIdx = 5;
3873 break;
3874 case CmpInst::ICMP_SGE:
3875 PredIdx = 6;
3876 break;
3877 case CmpInst::ICMP_SLT:
3878 PredIdx = 7;
3879 SwapOperands = true;
3880 break;
3881 case CmpInst::ICMP_SLE:
3882 PredIdx = 8;
3883 SwapOperands = true;
3884 break;
3885 default:
3886 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3886)
;
3887 return false;
3888 }
3889
3890 // This table obviously should be tablegen'd when we have our GISel native
3891 // tablegen selector.
3892
3893 static const unsigned OpcTable[4][4][9] = {
3894 {
3895 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3896 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3897 0 /* invalid */},
3898 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3899 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3900 0 /* invalid */},
3901 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3902 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3903 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3904 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3905 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3906 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3907 },
3908 {
3909 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3910 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3911 0 /* invalid */},
3912 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3913 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3914 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3915 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3916 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3917 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3918 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3919 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3920 0 /* invalid */}
3921 },
3922 {
3923 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3924 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3925 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3926 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3927 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3928 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3929 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3930 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3931 0 /* invalid */},
3932 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3933 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3934 0 /* invalid */}
3935 },
3936 {
3937 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3938 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3939 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3940 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3941 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3942 0 /* invalid */},
3943 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3944 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3945 0 /* invalid */},
3946 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3947 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3948 0 /* invalid */}
3949 },
3950 };
3951 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3952 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3953 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3954 if (!Opc) {
3955 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3956 return false;
3957 }
3958
3959 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3960 const TargetRegisterClass *SrcRC =
3961 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3962 if (!SrcRC) {
3963 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3964 return false;
3965 }
3966
3967 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3968 if (SrcTy.getSizeInBits() == 128)
3969 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3970
3971 if (SwapOperands)
3972 std::swap(SrcReg, Src2Reg);
3973
3974 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3975 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3976
3977 // Invert if we had a 'ne' cc.
3978 if (NotOpc) {
3979 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3980 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3981 } else {
3982 MIB.buildCopy(DstReg, Cmp.getReg(0));
3983 }
3984 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3985 I.eraseFromParent();
3986 return true;
3987}
3988
3989MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3990 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3991 MachineIRBuilder &MIRBuilder) const {
3992 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3993
3994 auto BuildFn = [&](unsigned SubregIndex) {
3995 auto Ins =
3996 MIRBuilder
3997 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3998 .addImm(SubregIndex);
3999 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
4000 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
4001 return &*Ins;
4002 };
4003
4004 switch (EltSize) {
4005 case 8:
4006 return BuildFn(AArch64::bsub);
4007 case 16:
4008 return BuildFn(AArch64::hsub);
4009 case 32:
4010 return BuildFn(AArch64::ssub);
4011 case 64:
4012 return BuildFn(AArch64::dsub);
4013 default:
4014 return nullptr;
4015 }
4016}
4017
4018bool AArch64InstructionSelector::selectMergeValues(
4019 MachineInstr &I, MachineRegisterInfo &MRI) {
4020 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4020, __extension__ __PRETTY_FUNCTION__))
;
4021 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4022 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4023 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4023, __extension__ __PRETTY_FUNCTION__))
;
4024 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4025
4026 if (I.getNumOperands() != 3)
4027 return false;
4028
4029 // Merging 2 s64s into an s128.
4030 if (DstTy == LLT::scalar(128)) {
4031 if (SrcTy.getSizeInBits() != 64)
4032 return false;
4033 Register DstReg = I.getOperand(0).getReg();
4034 Register Src1Reg = I.getOperand(1).getReg();
4035 Register Src2Reg = I.getOperand(2).getReg();
4036 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4037 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
4038 /* LaneIdx */ 0, RB, MIB);
4039 if (!InsMI)
4040 return false;
4041 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4042 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4043 if (!Ins2MI)
4044 return false;
4045 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4046 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4047 I.eraseFromParent();
4048 return true;
4049 }
4050
4051 if (RB.getID() != AArch64::GPRRegBankID)
4052 return false;
4053
4054 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4055 return false;
4056
4057 auto *DstRC = &AArch64::GPR64RegClass;
4058 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4059 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4060 TII.get(TargetOpcode::SUBREG_TO_REG))
4061 .addDef(SubToRegDef)
4062 .addImm(0)
4063 .addUse(I.getOperand(1).getReg())
4064 .addImm(AArch64::sub_32);
4065 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4066 // Need to anyext the second scalar before we can use bfm
4067 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4068 TII.get(TargetOpcode::SUBREG_TO_REG))
4069 .addDef(SubToRegDef2)
4070 .addImm(0)
4071 .addUse(I.getOperand(2).getReg())
4072 .addImm(AArch64::sub_32);
4073 MachineInstr &BFM =
4074 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4075 .addDef(I.getOperand(0).getReg())
4076 .addUse(SubToRegDef)
4077 .addUse(SubToRegDef2)
4078 .addImm(32)
4079 .addImm(31);
4080 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4081 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4082 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4083 I.eraseFromParent();
4084 return true;
4085}
4086
4087static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4088 const unsigned EltSize) {
4089 // Choose a lane copy opcode and subregister based off of the size of the
4090 // vector's elements.
4091 switch (EltSize) {
4092 case 8:
4093 CopyOpc = AArch64::DUPi8;
4094 ExtractSubReg = AArch64::bsub;
4095 break;
4096 case 16:
4097 CopyOpc = AArch64::DUPi16;
4098 ExtractSubReg = AArch64::hsub;
4099 break;
4100 case 32:
4101 CopyOpc = AArch64::DUPi32;
4102 ExtractSubReg = AArch64::ssub;
4103 break;
4104 case 64:
4105 CopyOpc = AArch64::DUPi64;
4106 ExtractSubReg = AArch64::dsub;
4107 break;
4108 default:
4109 // Unknown size, bail out.
4110 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4111 return false;
4112 }
4113 return true;
4114}
4115
4116MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4117 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4118 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4119 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4120 unsigned CopyOpc = 0;
4121 unsigned ExtractSubReg = 0;
4122 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4123 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4124 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4125 return nullptr;
4126 }
4127
4128 const TargetRegisterClass *DstRC =
4129 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4130 if (!DstRC) {
4131 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4132 return nullptr;
4133 }
4134
4135 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4136 const LLT &VecTy = MRI.getType(VecReg);
4137 const TargetRegisterClass *VecRC =
4138 getRegClassForTypeOnBank(VecTy, VecRB, true);
4139 if (!VecRC) {
4140 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4141 return nullptr;
4142 }
4143
4144 // The register that we're going to copy into.
4145 Register InsertReg = VecReg;
4146 if (!DstReg)
4147 DstReg = MRI.createVirtualRegister(DstRC);
4148 // If the lane index is 0, we just use a subregister COPY.
4149 if (LaneIdx == 0) {
4150 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4151 .addReg(VecReg, 0, ExtractSubReg);
4152 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4153 return &*Copy;
4154 }
4155
4156 // Lane copies require 128-bit wide registers. If we're dealing with an
4157 // unpacked vector, then we need to move up to that width. Insert an implicit
4158 // def and a subregister insert to get us there.
4159 if (VecTy.getSizeInBits() != 128) {
4160 MachineInstr *ScalarToVector = emitScalarToVector(
4161 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4162 if (!ScalarToVector)
4163 return nullptr;
4164 InsertReg = ScalarToVector->getOperand(0).getReg();
4165 }
4166
4167 MachineInstr *LaneCopyMI =
4168 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4169 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4170
4171 // Make sure that we actually constrain the initial copy.
4172 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4173 return LaneCopyMI;
4174}
4175
4176bool AArch64InstructionSelector::selectExtractElt(
4177 MachineInstr &I, MachineRegisterInfo &MRI) {
4178 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
4179 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
;
4180 Register DstReg = I.getOperand(0).getReg();
4181 const LLT NarrowTy = MRI.getType(DstReg);
4182 const Register SrcReg = I.getOperand(1).getReg();
4183 const LLT WideTy = MRI.getType(SrcReg);
4184 (void)WideTy;
4185 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4186, __extension__ __PRETTY_FUNCTION__))
4186 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4186, __extension__ __PRETTY_FUNCTION__))
;
4187 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4187, __extension__ __PRETTY_FUNCTION__))
;
4188
4189 // Need the lane index to determine the correct copy opcode.
4190 MachineOperand &LaneIdxOp = I.getOperand(2);
4191 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4191, __extension__ __PRETTY_FUNCTION__))
;
4192
4193 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4194 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4195 return false;
4196 }
4197
4198 // Find the index to extract from.
4199 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4200 if (!VRegAndVal)
4201 return false;
4202 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4203
4204
4205 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4206 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4207 LaneIdx, MIB);
4208 if (!Extract)
4209 return false;
4210
4211 I.eraseFromParent();
4212 return true;
4213}
4214
4215bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4216 MachineInstr &I, MachineRegisterInfo &MRI) {
4217 unsigned NumElts = I.getNumOperands() - 1;
4218 Register SrcReg = I.getOperand(NumElts).getReg();
4219 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4220 const LLT SrcTy = MRI.getType(SrcReg);
4221
4222 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4222, __extension__ __PRETTY_FUNCTION__))
;
4223 if (SrcTy.getSizeInBits() > 128) {
4224 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4225 return false;
4226 }
4227
4228 // We implement a split vector operation by treating the sub-vectors as
4229 // scalars and extracting them.
4230 const RegisterBank &DstRB =
4231 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4232 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4233 Register Dst = I.getOperand(OpIdx).getReg();
4234 MachineInstr *Extract =
4235 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4236 if (!Extract)
4237 return false;
4238 }
4239 I.eraseFromParent();
4240 return true;
4241}
4242
4243bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4244 MachineRegisterInfo &MRI) {
4245 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4246, __extension__ __PRETTY_FUNCTION__))
4246 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4246, __extension__ __PRETTY_FUNCTION__))
;
4247
4248 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4249 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4250 AArch64::FPRRegBankID ||
4251 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4252 AArch64::FPRRegBankID) {
4253 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4254 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4255 return false;
4256 }
4257
4258 // The last operand is the vector source register, and every other operand is
4259 // a register to unpack into.
4260 unsigned NumElts = I.getNumOperands() - 1;
4261 Register SrcReg = I.getOperand(NumElts).getReg();
4262 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4263 const LLT WideTy = MRI.getType(SrcReg);
4264 (void)WideTy;
4265 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4266, __extension__ __PRETTY_FUNCTION__))
4266 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4266, __extension__ __PRETTY_FUNCTION__))
;
4267 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4268, __extension__ __PRETTY_FUNCTION__))
4268 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4268, __extension__ __PRETTY_FUNCTION__))
;
4269
4270 if (!NarrowTy.isScalar())
4271 return selectSplitVectorUnmerge(I, MRI);
4272
4273 // Choose a lane copy opcode and subregister based off of the size of the
4274 // vector's elements.
4275 unsigned CopyOpc = 0;
4276 unsigned ExtractSubReg = 0;
4277 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4278 return false;
4279
4280 // Set up for the lane copies.
4281 MachineBasicBlock &MBB = *I.getParent();
4282
4283 // Stores the registers we'll be copying from.
4284 SmallVector<Register, 4> InsertRegs;
4285
4286 // We'll use the first register twice, so we only need NumElts-1 registers.
4287 unsigned NumInsertRegs = NumElts - 1;
4288
4289 // If our elements fit into exactly 128 bits, then we can copy from the source
4290 // directly. Otherwise, we need to do a bit of setup with some subregister
4291 // inserts.
4292 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4293 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4294 } else {
4295 // No. We have to perform subregister inserts. For each insert, create an
4296 // implicit def and a subregister insert, and save the register we create.
4297 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4298 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4299 *RBI.getRegBank(SrcReg, MRI, TRI));
4300 unsigned SubReg = 0;
4301 bool Found = getSubRegForClass(RC, TRI, SubReg);
4302 (void)Found;
4303 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4303, __extension__ __PRETTY_FUNCTION__))
;
4304 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4305 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4306 MachineInstr &ImpDefMI =
4307 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4308 ImpDefReg);
4309
4310 // Now, create the subregister insert from SrcReg.
4311 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4312 MachineInstr &InsMI =
4313 *BuildMI(MBB, I, I.getDebugLoc(),
4314 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4315 .addUse(ImpDefReg)
4316 .addUse(SrcReg)
4317 .addImm(SubReg);
4318
4319 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4320 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4321
4322 // Save the register so that we can copy from it after.
4323 InsertRegs.push_back(InsertReg);
4324 }
4325 }
4326
4327 // Now that we've created any necessary subregister inserts, we can
4328 // create the copies.
4329 //
4330 // Perform the first copy separately as a subregister copy.
4331 Register CopyTo = I.getOperand(0).getReg();
4332 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4333 .addReg(InsertRegs[0], 0, ExtractSubReg);
4334 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4335
4336 // Now, perform the remaining copies as vector lane copies.
4337 unsigned LaneIdx = 1;
4338 for (Register InsReg : InsertRegs) {
4339 Register CopyTo = I.getOperand(LaneIdx).getReg();
4340 MachineInstr &CopyInst =
4341 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4342 .addUse(InsReg)
4343 .addImm(LaneIdx);
4344 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4345 ++LaneIdx;
4346 }
4347
4348 // Separately constrain the first copy's destination. Because of the
4349 // limitation in constrainOperandRegClass, we can't guarantee that this will
4350 // actually be constrained. So, do it ourselves using the second operand.
4351 const TargetRegisterClass *RC =
4352 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4353 if (!RC) {
4354 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4355 return false;
4356 }
4357
4358 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4359 I.eraseFromParent();
4360 return true;
4361}
4362
4363bool AArch64InstructionSelector::selectConcatVectors(
4364 MachineInstr &I, MachineRegisterInfo &MRI) {
4365 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4366, __extension__ __PRETTY_FUNCTION__))
4366 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4366, __extension__ __PRETTY_FUNCTION__))
;
4367 Register Dst = I.getOperand(0).getReg();
4368 Register Op1 = I.getOperand(1).getReg();
4369 Register Op2 = I.getOperand(2).getReg();
4370 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4371 if (!ConcatMI)
4372 return false;
4373 I.eraseFromParent();
4374 return true;
4375}
4376
4377unsigned
4378AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4379 MachineFunction &MF) const {
4380 Type *CPTy = CPVal->getType();
4381 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4382
4383 MachineConstantPool *MCP = MF.getConstantPool();
4384 return MCP->getConstantPoolIndex(CPVal, Alignment);
4385}
4386
4387MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4388 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4389 const TargetRegisterClass *RC;
4390 unsigned Opc;
4391 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4392 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4393 switch (Size) {
4394 case 16:
4395 RC = &AArch64::FPR128RegClass;
4396 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4397 break;
4398 case 8:
4399 RC = &AArch64::FPR64RegClass;
4400 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4401 break;
4402 case 4:
4403 RC = &AArch64::FPR32RegClass;
4404 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4405 break;
4406 case 2:
4407 RC = &AArch64::FPR16RegClass;
4408 Opc = AArch64::LDRHui;
4409 break;
4410 default:
4411 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4412 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4413 return nullptr;
4414 }
4415
4416 MachineInstr *LoadMI = nullptr;
4417 auto &MF = MIRBuilder.getMF();
4418 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4419 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4420 // Use load(literal) for tiny code model.
4421 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4422 } else {
4423 auto Adrp =
4424 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4425 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4426
4427 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4428 .addConstantPoolIndex(
4429 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4430
4431 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4432 }
4433
4434 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4435 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4436 MachineMemOperand::MOLoad,
4437 Size, Align(Size)));
4438 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4439 return LoadMI;
4440}
4441
4442/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4443/// size and RB.
4444static std::pair<unsigned, unsigned>
4445getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4446 unsigned Opc, SubregIdx;
4447 if (RB.getID() == AArch64::GPRRegBankID) {
4448 if (EltSize == 16) {
4449 Opc = AArch64::INSvi16gpr;
4450 SubregIdx = AArch64::ssub;
4451 } else if (EltSize == 32) {
4452 Opc = AArch64::INSvi32gpr;
4453 SubregIdx = AArch64::ssub;
4454 } else if (EltSize == 64) {
4455 Opc = AArch64::INSvi64gpr;
4456 SubregIdx = AArch64::dsub;
4457 } else {
4458 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4458)
;
4459 }
4460 } else {
4461 if (EltSize == 8) {
4462 Opc = AArch64::INSvi8lane;
4463 SubregIdx = AArch64::bsub;
4464 } else if (EltSize == 16) {
4465 Opc = AArch64::INSvi16lane;
4466 SubregIdx = AArch64::hsub;
4467 } else if (EltSize == 32) {
4468 Opc = AArch64::INSvi32lane;
4469 SubregIdx = AArch64::ssub;
4470 } else if (EltSize == 64) {
4471 Opc = AArch64::INSvi64lane;
4472 SubregIdx = AArch64::dsub;
4473 } else {
4474 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4474)
;
4475 }
4476 }
4477 return std::make_pair(Opc, SubregIdx);
4478}
4479
4480MachineInstr *AArch64InstructionSelector::emitInstr(
4481 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4482 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4483 const ComplexRendererFns &RenderFns) const {
4484 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4484, __extension__ __PRETTY_FUNCTION__))
;
4485 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
4486 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
;
4487 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4488 if (RenderFns)
4489 for (auto &Fn : *RenderFns)
4490 Fn(MI);
4491 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4492 return &*MI;
4493}
4494
4495MachineInstr *AArch64InstructionSelector::emitAddSub(
4496 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4497 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4498 MachineIRBuilder &MIRBuilder) const {
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
;
4501 auto Ty = MRI.getType(LHS.getReg());
4502 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4502, __extension__ __PRETTY_FUNCTION__))
;
4503 unsigned Size = Ty.getSizeInBits();
4504 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4504, __extension__ __PRETTY_FUNCTION__))
;
4505 bool Is32Bit = Size == 32;
4506
4507 // INSTRri form with positive arithmetic immediate.
4508 if (auto Fns = selectArithImmed(RHS))
4509 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4510 MIRBuilder, Fns);
4511
4512 // INSTRri form with negative arithmetic immediate.
4513 if (auto Fns = selectNegArithImmed(RHS))
4514 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4515 MIRBuilder, Fns);
4516
4517 // INSTRrx form.
4518 if (auto Fns = selectArithExtendedRegister(RHS))
4519 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4520 MIRBuilder, Fns);
4521
4522 // INSTRrs form.
4523 if (auto Fns = selectShiftedRegister(RHS))
4524 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4525 MIRBuilder, Fns);
4526 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4527 MIRBuilder);
4528}
4529
4530MachineInstr *
4531AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4532 MachineOperand &RHS,
4533 MachineIRBuilder &MIRBuilder) const {
4534 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4535 {{AArch64::ADDXri, AArch64::ADDWri},
4536 {AArch64::ADDXrs, AArch64::ADDWrs},
4537 {AArch64::ADDXrr, AArch64::ADDWrr},
4538 {AArch64::SUBXri, AArch64::SUBWri},
4539 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4540 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4541}
4542
4543MachineInstr *
4544AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4545 MachineOperand &RHS,
4546 MachineIRBuilder &MIRBuilder) const {
4547 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4548 {{AArch64::ADDSXri, AArch64::ADDSWri},
4549 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4550 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4551 {AArch64::SUBSXri, AArch64::SUBSWri},
4552 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4553 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4554}
4555
4556MachineInstr *
4557AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4558 MachineOperand &RHS,
4559 MachineIRBuilder &MIRBuilder) const {
4560 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4561 {{AArch64::SUBSXri, AArch64::SUBSWri},
4562 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4563 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4564 {AArch64::ADDSXri, AArch64::ADDSWri},
4565 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4566 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4567}
4568
4569MachineInstr *
4570AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4571 MachineIRBuilder &MIRBuilder) const {
4572 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4573 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4574 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4575 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4576}
4577
4578MachineInstr *
4579AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4580 MachineIRBuilder &MIRBuilder) const {
4581 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4581, __extension__ __PRETTY_FUNCTION__))
;
4582 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4583 LLT Ty = MRI.getType(LHS.getReg());
4584 unsigned RegSize = Ty.getSizeInBits();
4585 bool Is32Bit = (RegSize == 32);
4586 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4587 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4588 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4589 // ANDS needs a logical immediate for its immediate form. Check if we can
4590 // fold one in.
4591 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4592 int64_t Imm = ValAndVReg->Value.getSExtValue();
4593
4594 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4595 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4596 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4597 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4598 return &*TstMI;
4599 }
4600 }
4601
4602 if (auto Fns = selectLogicalShiftedRegister(RHS))
4603 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4604 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4605}
4606
4607MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4608 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4609 MachineIRBuilder &MIRBuilder) const {
4610 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4610, __extension__ __PRETTY_FUNCTION__))
;
4611 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4611, __extension__ __PRETTY_FUNCTION__))
;
4612 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4613 LLT CmpTy = MRI.getType(LHS.getReg());
4614 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4614, __extension__ __PRETTY_FUNCTION__))
;
4615 unsigned Size = CmpTy.getSizeInBits();
4616 (void)Size;
4617 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4617, __extension__ __PRETTY_FUNCTION__))
;
4618 // Fold the compare into a cmn or tst if possible.
4619 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4620 return FoldCmp;
4621 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4622 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4623}
4624
4625MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4626 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4627 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4628#ifndef NDEBUG
4629 LLT Ty = MRI.getType(Dst);
4630 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
4631 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
;
4632#endif
4633 const Register ZReg = AArch64::WZR;
4634 AArch64CC::CondCode CC1, CC2;
4635 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4636 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4637 if (CC2 == AArch64CC::AL)
4638 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4639 MIRBuilder);
4640 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4641 Register Def1Reg = MRI.createVirtualRegister(RC);
4642 Register Def2Reg = MRI.createVirtualRegister(RC);
4643 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4644 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4645 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4646 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4647 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4648 return &*OrMI;
4649}
4650
4651MachineInstr *AArch64InstructionSelector::emitFPCompare(
4652 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4653 std::optional<CmpInst::Predicate> Pred) const {
4654 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4655 LLT Ty = MRI.getType(LHS);
4656 if (Ty.isVector())
4657 return nullptr;
4658 unsigned OpSize = Ty.getSizeInBits();
4659 if (OpSize != 32 && OpSize != 64)
4660 return nullptr;
4661
4662 // If this is a compare against +0.0, then we don't have
4663 // to explicitly materialize a constant.
4664 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4665 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4666
4667 auto IsEqualityPred = [](CmpInst::Predicate P) {
4668 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4669 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4670 };
4671 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4672 // Try commutating the operands.
4673 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4674 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4675 ShouldUseImm = true;
4676 std::swap(LHS, RHS);
4677 }
4678 }
4679 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4680 {AArch64::FCMPSri, AArch64::FCMPDri}};
4681 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4682
4683 // Partially build the compare. Decide if we need to add a use for the
4684 // third operand based off whether or not we're comparing against 0.0.
4685 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4686 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4687 if (!ShouldUseImm)
4688 CmpMI.addUse(RHS);
4689 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4690 return &*CmpMI;
4691}
4692
4693MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4694 std::optional<Register> Dst, Register Op1, Register Op2,
4695 MachineIRBuilder &MIRBuilder) const {
4696 // We implement a vector concat by:
4697 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4698 // 2. Insert the upper vector into the destination's upper element
4699 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4700 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4701
4702 const LLT Op1Ty = MRI.getType(Op1);
4703 const LLT Op2Ty = MRI.getType(Op2);
4704
4705 if (Op1Ty != Op2Ty) {
4706 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4707 return nullptr;
4708 }
4709 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4709, __extension__ __PRETTY_FUNCTION__))
;
4710
4711 if (Op1Ty.getSizeInBits() >= 128) {
4712 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4713 return nullptr;
4714 }
4715
4716 // At the moment we just support 64 bit vector concats.
4717 if (Op1Ty.getSizeInBits() != 64) {
4718 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4719 return nullptr;
4720 }
4721
4722 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4723 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4724 const TargetRegisterClass *DstRC =
4725 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4726
4727 MachineInstr *WidenedOp1 =
4728 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4729 MachineInstr *WidenedOp2 =
4730 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4731 if (!WidenedOp1 || !WidenedOp2) {
4732 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4733 return nullptr;
4734 }
4735
4736 // Now do the insert of the upper element.
4737 unsigned InsertOpc, InsSubRegIdx;
4738 std::tie(InsertOpc, InsSubRegIdx) =
4739 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4740
4741 if (!Dst)
4742 Dst = MRI.createVirtualRegister(DstRC);
4743 auto InsElt =
4744 MIRBuilder
4745 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4746 .addImm(1) /* Lane index */
4747 .addUse(WidenedOp2->getOperand(0).getReg())
4748 .addImm(0);
4749 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4750 return &*InsElt;
4751}
4752
4753MachineInstr *
4754AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4755 Register Src2, AArch64CC::CondCode Pred,
4756 MachineIRBuilder &MIRBuilder) const {
4757 auto &MRI = *MIRBuilder.getMRI();
4758 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4759 // If we used a register class, then this won't necessarily have an LLT.
4760 // Compute the size based off whether or not we have a class or bank.
4761 unsigned Size;
4762 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4763 Size = TRI.getRegSizeInBits(*RC);
4764 else
4765 Size = MRI.getType(Dst).getSizeInBits();
4766 // Some opcodes use s1.
4767 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4767, __extension__ __PRETTY_FUNCTION__))
;
4768 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4769 unsigned Opc = OpcTable[Size == 64];
4770 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4771 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4772 return &*CSINC;
4773}
4774
4775std::pair<MachineInstr *, AArch64CC::CondCode>
4776AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4777 MachineOperand &LHS,
4778 MachineOperand &RHS,
4779 MachineIRBuilder &MIRBuilder) const {
4780 switch (Opcode) {
4781 default:
4782 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4782)
;
4783 case TargetOpcode::G_SADDO:
4784 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4785 case TargetOpcode::G_UADDO:
4786 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4787 case TargetOpcode::G_SSUBO:
4788 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4789 case TargetOpcode::G_USUBO:
4790 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4791 }
4792}
4793
4794/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4795/// expressed as a conjunction.
4796/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4797/// changing the conditions on the CMP tests.
4798/// (this means we can call emitConjunctionRec() with
4799/// Negate==true on this sub-tree)
4800/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4801/// cannot do the negation naturally. We are required to
4802/// emit the subtree first in this case.
4803/// \param WillNegate Is true if are called when the result of this
4804/// subexpression must be negated. This happens when the
4805/// outer expression is an OR. We can use this fact to know
4806/// that we have a double negation (or (or ...) ...) that
4807/// can be implemented for free.
4808static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4809 bool WillNegate, MachineRegisterInfo &MRI,
4810 unsigned Depth = 0) {
4811 if (!MRI.hasOneNonDBGUse(Val))
4812 return false;
4813 MachineInstr *ValDef = MRI.getVRegDef(Val);
4814 unsigned Opcode = ValDef->getOpcode();
4815 if (isa<GAnyCmp>(ValDef)) {
4816 CanNegate = true;
4817 MustBeFirst = false;
4818 return true;
4819 }
4820 // Protect against exponential runtime and stack overflow.
4821 if (Depth > 6)
4822 return false;
4823 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4824 bool IsOR = Opcode == TargetOpcode::G_OR;
4825 Register O0 = ValDef->getOperand(1).getReg();
4826 Register O1 = ValDef->getOperand(2).getReg();
4827 bool CanNegateL;
4828 bool MustBeFirstL;
4829 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4830 return false;
4831 bool CanNegateR;
4832 bool MustBeFirstR;
4833 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4834 return false;
4835
4836 if (MustBeFirstL && MustBeFirstR)
4837 return false;
4838
4839 if (IsOR) {
4840 // For an OR expression we need to be able to naturally negate at least
4841 // one side or we cannot do the transformation at all.
4842 if (!CanNegateL && !CanNegateR)
4843 return false;
4844 // If we the result of the OR will be negated and we can naturally negate
4845 // the leaves, then this sub-tree as a whole negates naturally.
4846 CanNegate = WillNegate && CanNegateL && CanNegateR;
4847 // If we cannot naturally negate the whole sub-tree, then this must be
4848 // emitted first.
4849 MustBeFirst = !CanNegate;
4850 } else {
4851 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4851, __extension__ __PRETTY_FUNCTION__))
;
4852 // We cannot naturally negate an AND operation.
4853 CanNegate = false;
4854 MustBeFirst = MustBeFirstL || MustBeFirstR;
4855 }
4856 return true;
4857 }
4858 return false;
4859}
4860
4861MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4862 Register LHS, Register RHS, CmpInst::Predicate CC,
4863 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4864 MachineIRBuilder &MIB) const {
4865 // TODO: emit CMN as an optimization.
4866 auto &MRI = *MIB.getMRI();
4867 LLT OpTy = MRI.getType(LHS);
4868 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4868, __extension__ __PRETTY_FUNCTION__))
;
4869 unsigned CCmpOpc;
4870 std::optional<ValueAndVReg> C;
4871 if (CmpInst::isIntPredicate(CC)) {
4872 C = getIConstantVRegValWithLookThrough(RHS, MRI);
4873 if (C && C->Value.ult(32))
4874 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4875 else
4876 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4877 } else {
4878 switch (OpTy.getSizeInBits()) {
4879 case 16:
4880 CCmpOpc = AArch64::FCCMPHrr;
4881 break;
4882 case 32:
4883 CCmpOpc = AArch64::FCCMPSrr;
4884 break;
4885 case 64:
4886 CCmpOpc = AArch64::FCCMPDrr;
4887 break;
4888 default:
4889 return nullptr;
4890 }
4891 }
4892 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4893 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4894 auto CCmp =
4895 MIB.buildInstr(CCmpOpc, {}, {LHS});
4896 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4897 CCmp.addImm(C->Value.getZExtValue());
4898 else
4899 CCmp.addReg(RHS);
4900 CCmp.addImm(NZCV).addImm(Predicate);
4901 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4902 return &*CCmp;
4903}
4904
4905MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4906 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4907 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4908 // We're at a tree leaf, produce a conditional comparison operation.
4909 auto &MRI = *MIB.getMRI();
4910 MachineInstr *ValDef = MRI.getVRegDef(Val);
4911 unsigned Opcode = ValDef->getOpcode();
4912 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4913 Register LHS = Cmp->getLHSReg();
4914 Register RHS = Cmp->getRHSReg();
4915 CmpInst::Predicate CC = Cmp->getCond();
4916 if (Negate)
4917 CC = CmpInst::getInversePredicate(CC);
4918 if (isa<GICmp>(Cmp)) {
4919 OutCC = changeICMPPredToAArch64CC(CC);
4920 } else {
4921 // Handle special FP cases.
4922 AArch64CC::CondCode ExtraCC;
4923 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4924 // Some floating point conditions can't be tested with a single condition
4925 // code. Construct an additional comparison in this case.
4926 if (ExtraCC != AArch64CC::AL) {
4927 MachineInstr *ExtraCmp;
4928 if (!CCOp)
4929 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4930 else
4931 ExtraCmp =
4932 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4933 CCOp = ExtraCmp->getOperand(0).getReg();
4934 Predicate = ExtraCC;
4935 }
4936 }
4937
4938 // Produce a normal comparison if we are first in the chain
4939 if (!CCOp) {
4940 auto Dst = MRI.cloneVirtualRegister(LHS);
4941 if (isa<GICmp>(Cmp))
4942 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4943 return emitFPCompare(Cmp->getOperand(2).getReg(),
4944 Cmp->getOperand(3).getReg(), MIB);
4945 }
4946 // Otherwise produce a ccmp.
4947 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4948 }
4949 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4949, __extension__ __PRETTY_FUNCTION__))
;
4950
4951 bool IsOR = Opcode == TargetOpcode::G_OR;
4952
4953 Register LHS = ValDef->getOperand(1).getReg();
4954 bool CanNegateL;
4955 bool MustBeFirstL;
4956 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4957 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4957, __extension__ __PRETTY_FUNCTION__))
;
4958 (void)ValidL;
4959
4960 Register RHS = ValDef->getOperand(2).getReg();
4961 bool CanNegateR;
4962 bool MustBeFirstR;
4963 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4964 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4964, __extension__ __PRETTY_FUNCTION__))
;
4965 (void)ValidR;
4966
4967 // Swap sub-tree that must come first to the right side.
4968 if (MustBeFirstL) {
4969 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4969, __extension__ __PRETTY_FUNCTION__))
;
4970 std::swap(LHS, RHS);
4971 std::swap(CanNegateL, CanNegateR);
4972 std::swap(MustBeFirstL, MustBeFirstR);
4973 }
4974
4975 bool NegateR;
4976 bool NegateAfterR;
4977 bool NegateL;
4978 bool NegateAfterAll;
4979 if (Opcode == TargetOpcode::G_OR) {
4980 // Swap the sub-tree that we can negate naturally to the left.
4981 if (!CanNegateL) {
4982 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4982, __extension__ __PRETTY_FUNCTION__))
;
4983 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4983, __extension__ __PRETTY_FUNCTION__))
;
4984 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4984, __extension__ __PRETTY_FUNCTION__))
;
4985 std::swap(LHS, RHS);
4986 NegateR = false;
4987 NegateAfterR = true;
4988 } else {
4989 // Negate the left sub-tree if possible, otherwise negate the result.
4990 NegateR = CanNegateR;
4991 NegateAfterR = !CanNegateR;
4992 }
4993 NegateL = true;
4994 NegateAfterAll = !Negate;
4995 } else {
4996 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4997, __extension__ __PRETTY_FUNCTION__))
4997 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4997, __extension__ __PRETTY_FUNCTION__))
;
4998 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4998, __extension__ __PRETTY_FUNCTION__))
;
4999
5000 NegateL = false;
5001 NegateR = false;
5002 NegateAfterR = false;
5003 NegateAfterAll = false;
5004 }
5005
5006 // Emit sub-trees.
5007 AArch64CC::CondCode RHSCC;
5008 MachineInstr *CmpR =
5009 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5010 if (NegateAfterR)
5011 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5012 MachineInstr *CmpL = emitConjunctionRec(
5013 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5014 if (NegateAfterAll)
5015 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5016 return CmpL;
5017}
5018
5019MachineInstr *AArch64InstructionSelector::emitConjunction(
5020 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5021 bool DummyCanNegate;
5022 bool DummyMustBeFirst;
5023 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5024 *MIB.getMRI()))
5025 return nullptr;
5026 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5027}
5028
5029bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5030 MachineInstr &CondMI) {
5031 AArch64CC::CondCode AArch64CC;
5032 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5033 if (!ConjMI)
5034 return false;
5035
5036 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5037 SelI.eraseFromParent();
5038 return true;
5039}
5040
5041bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5042 MachineRegisterInfo &MRI = *MIB.getMRI();
5043 // We want to recognize this pattern:
5044 //
5045 // $z = G_FCMP pred, $x, $y
5046 // ...
5047 // $w = G_SELECT $z, $a, $b
5048 //
5049 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5050 // some copies/truncs in between.)
5051 //
5052 // If we see this, then we can emit something like this:
5053 //
5054 // fcmp $x, $y
5055 // fcsel $w, $a, $b, pred
5056 //
5057 // Rather than emitting both of the rather long sequences in the standard
5058 // G_FCMP/G_SELECT select methods.
5059
5060 // First, check if the condition is defined by a compare.
5061 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5062
5063 // We can only fold if all of the defs have one use.
5064 Register CondDefReg = CondDef->getOperand(0).getReg();
5065 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5066 // Unless it's another select.
5067 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5068 if (CondDef == &UI)
5069 continue;
5070 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5071 return false;
5072 }
5073 }
5074
5075 // Is the condition defined by a compare?
5076 unsigned CondOpc = CondDef->getOpcode();
5077 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5078 if (tryOptSelectConjunction(I, *CondDef))
5079 return true;
5080 return false;
5081 }
5082
5083 AArch64CC::CondCode CondCode;
5084 if (CondOpc == TargetOpcode::G_ICMP) {
5085 auto Pred =
5086 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5087 CondCode = changeICMPPredToAArch64CC(Pred);
5088 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5089 CondDef->getOperand(1), MIB);
5090 } else {
5091 // Get the condition code for the select.
5092 auto Pred =
5093 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5094 AArch64CC::CondCode CondCode2;
5095 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5096
5097 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5098 // instructions to emit the comparison.
5099 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5100 // unnecessary.
5101 if (CondCode2 != AArch64CC::AL)
5102 return false;
5103
5104 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5105 CondDef->getOperand(3).getReg(), MIB)) {
5106 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5107 return false;
5108 }
5109 }
5110
5111 // Emit the select.
5112 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5113 I.getOperand(3).getReg(), CondCode, MIB);
5114 I.eraseFromParent();
5115 return true;
5116}
5117
5118MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5119 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5120 MachineIRBuilder &MIRBuilder) const {
5121 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5122, __extension__ __PRETTY_FUNCTION__))
5122 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5122, __extension__ __PRETTY_FUNCTION__))
;
5123 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5124 // We want to find this sort of thing:
5125 // x = G_SUB 0, y
5126 // G_ICMP z, x
5127 //
5128 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5129 // e.g:
5130 //
5131 // cmn z, y
5132
5133 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5134 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5135 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5136 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5137 // Given this:
5138 //
5139 // x = G_SUB 0, y
5140 // G_ICMP x, z
5141 //
5142 // Produce this:
5143 //
5144 // cmn y, z
5145 if (isCMN(LHSDef, P, MRI))
5146 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5147
5148 // Same idea here, but with the RHS of the compare instead:
5149 //
5150 // Given this:
5151 //
5152 // x = G_SUB 0, y
5153 // G_ICMP z, x
5154 //
5155 // Produce this:
5156 //
5157 // cmn z, y
5158 if (isCMN(RHSDef, P, MRI))
5159 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5160
5161 // Given this:
5162 //
5163 // z = G_AND x, y
5164 // G_ICMP z, 0
5165 //
5166 // Produce this if the compare is signed:
5167 //
5168 // tst x, y
5169 if (!CmpInst::isUnsigned(P) && LHSDef &&
5170 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5171 // Make sure that the RHS is 0.
5172 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5173 if (!ValAndVReg || ValAndVReg->Value != 0)
5174 return nullptr;
5175
5176 return emitTST(LHSDef->getOperand(1),
5177 LHSDef->getOperand(2), MIRBuilder);
5178 }
5179
5180 return nullptr;
5181}
5182
5183bool AArch64InstructionSelector::selectShuffleVector(
5184 MachineInstr &I, MachineRegisterInfo &MRI) {
5185 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5186 Register Src1Reg = I.getOperand(1).getReg();
5187 const LLT Src1Ty = MRI.getType(Src1Reg);
5188 Register Src2Reg = I.getOperand(2).getReg();
5189 const LLT Src2Ty = MRI.getType(Src2Reg);
5190 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5191
5192 MachineBasicBlock &MBB = *I.getParent();
5193 MachineFunction &MF = *MBB.getParent();
5194 LLVMContext &Ctx = MF.getFunction().getContext();
5195
5196 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5197 // it's originated from a <1 x T> type. Those should have been lowered into
5198 // G_BUILD_VECTOR earlier.
5199 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5200 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5201 return false;
5202 }
5203
5204 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5205
5206 SmallVector<Constant *, 64> CstIdxs;
5207 for (int Val : Mask) {
5208 // For now, any undef indexes we'll just assume to be 0. This should be
5209 // optimized in future, e.g. to select DUP etc.
5210 Val = Val < 0 ? 0 : Val;
5211 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5212 unsigned Offset = Byte + Val * BytesPerElt;
5213 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5214 }
5215 }
5216
5217 // Use a constant pool to load the index vector for TBL.
5218 Constant *CPVal = ConstantVector::get(CstIdxs);
5219 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5220 if (!IndexLoad) {
5221 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5222 return false;
5223 }
5224
5225 if (DstTy.getSizeInBits() != 128) {
5226 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5226, __extension__ __PRETTY_FUNCTION__))
;
5227 // This case can be done with TBL1.
5228 MachineInstr *Concat =
5229 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5230 if (!Concat) {
5231 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5232 return false;
5233 }
5234
5235 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5236 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5237 IndexLoad->getOperand(0).getReg(), MIB);
5238
5239 auto TBL1 = MIB.buildInstr(
5240 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5241 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5242 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5243
5244 auto Copy =
5245 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5246 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5247 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5248 I.eraseFromParent();
5249 return true;
5250 }
5251
5252 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5253 // Q registers for regalloc.
5254 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5255 auto RegSeq = createQTuple(Regs, MIB);
5256 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5257 {RegSeq, IndexLoad->getOperand(0)});
5258 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5259 I.eraseFromParent();
5260 return true;
5261}
5262
5263MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5264 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5265 unsigned LaneIdx, const RegisterBank &RB,
5266 MachineIRBuilder &MIRBuilder) const {
5267 MachineInstr *InsElt = nullptr;
5268 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5269 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5270
5271 // Create a register to define with the insert if one wasn't passed in.
5272 if (!DstReg)
5273 DstReg = MRI.createVirtualRegister(DstRC);
5274
5275 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5276 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5277
5278 if (RB.getID() == AArch64::FPRRegBankID) {
5279 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5280 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5281 .addImm(LaneIdx)
5282 .addUse(InsSub->getOperand(0).getReg())
5283 .addImm(0);
5284 } else {
5285 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5286 .addImm(LaneIdx)
5287 .addUse(EltReg);
5288 }
5289
5290 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5291 return InsElt;
5292}
5293
5294bool AArch64InstructionSelector::selectUSMovFromExtend(
5295 MachineInstr &MI, MachineRegisterInfo &MRI) {
5296 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5297 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5298 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5299 return false;
5300 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5301 const Register DefReg = MI.getOperand(0).getReg();
5302 const LLT DstTy = MRI.getType(DefReg);
5303 unsigned DstSize = DstTy.getSizeInBits();
5304
5305 if (DstSize != 32 && DstSize != 64)
5306 return false;
5307
5308 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5309 MI.getOperand(1).getReg(), MRI);
5310 int64_t Lane;
5311 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5312 return false;
5313 Register Src0 = Extract->getOperand(1).getReg();
5314
5315 const LLT &VecTy = MRI.getType(Src0);
5316
5317 if (VecTy.getSizeInBits() != 128) {
5318 const MachineInstr *ScalarToVector = emitScalarToVector(
5319 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5320 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5320, __extension__ __PRETTY_FUNCTION__))
;
5321 Src0 = ScalarToVector->getOperand(0).getReg();
5322 }
5323
5324 unsigned Opcode;
5325 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5326 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5327 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5328 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5329 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5330 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5331 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5332 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5333 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5334 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5335 else
5336 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5336)
;
5337
5338 // We may need to generate one of these, depending on the type and sign of the
5339 // input:
5340 // DstReg = SMOV Src0, Lane;
5341 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5342 MachineInstr *ExtI = nullptr;
5343 if (DstSize == 64 && !IsSigned) {
5344 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5345 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5346 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5347 .addImm(0)
5348 .addUse(NewReg)
5349 .addImm(AArch64::sub_32);
5350 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5351 } else
5352 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5353
5354 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5355 MI.eraseFromParent();
5356 return true;
5357}
5358
5359bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5360 MachineRegisterInfo &MRI) {
5361 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5361, __extension__ __PRETTY_FUNCTION__))
;
5362
5363 // Get information on the destination.
5364 Register DstReg = I.getOperand(0).getReg();
5365 const LLT DstTy = MRI.getType(DstReg);
5366 unsigned VecSize = DstTy.getSizeInBits();
5367
5368 // Get information on the element we want to insert into the destination.
5369 Register EltReg = I.getOperand(2).getReg();
5370 const LLT EltTy = MRI.getType(EltReg);
5371 unsigned EltSize = EltTy.getSizeInBits();
5372 if (EltSize < 16 || EltSize > 64)
5373 return false; // Don't support all element types yet.
5374
5375 // Find the definition of the index. Bail out if it's not defined by a
5376 // G_CONSTANT.
5377 Register IdxReg = I.getOperand(3).getReg();
5378 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5379 if (!VRegAndVal)
5380 return false;
5381 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5382
5383 // Perform the lane insert.
5384 Register SrcReg = I.getOperand(1).getReg();
5385 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5386
5387 if (VecSize < 128) {
5388 // If the vector we're inserting into is smaller than 128 bits, widen it
5389 // to 128 to do the insert.
5390 MachineInstr *ScalarToVec =
5391 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5392 if (!ScalarToVec)
5393 return false;
5394 SrcReg = ScalarToVec->getOperand(0).getReg();
5395 }
5396
5397 // Create an insert into a new FPR128 register.
5398 // Note that if our vector is already 128 bits, we end up emitting an extra
5399 // register.
5400 MachineInstr *InsMI =
5401 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5402
5403 if (VecSize < 128) {
5404 // If we had to widen to perform the insert, then we have to demote back to
5405 // the original size to get the result we want.
5406 Register DemoteVec = InsMI->getOperand(0).getReg();
5407 const TargetRegisterClass *RC =
5408 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5409 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5410 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5411 return false;
5412 }
5413 unsigned SubReg = 0;
5414 if (!getSubRegForClass(RC, TRI, SubReg))
5415 return false;
5416 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5417 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5418 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5419 return false;
5420 }
5421 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5422 .addReg(DemoteVec, 0, SubReg);
5423 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5424 } else {
5425 // No widening needed.
5426 InsMI->getOperand(0).setReg(DstReg);
5427 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5428 }
5429
5430 I.eraseFromParent();
5431 return true;
5432}
5433
5434MachineInstr *
5435AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5436 MachineIRBuilder &MIRBuilder,
5437 MachineRegisterInfo &MRI) {
5438 LLT DstTy = MRI.getType(Dst);
5439 unsigned DstSize = DstTy.getSizeInBits();
5440 if (CV->isNullValue()) {
5441 if (DstSize == 128) {
5442 auto Mov =
5443 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5444 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5445 return &*Mov;
5446 }
5447
5448 if (DstSize == 64) {
5449 auto Mov =
5450 MIRBuilder
5451 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5452 .addImm(0);
5453 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5454 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5455 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5456 return &*Copy;
5457 }
5458 }
5459
5460 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5461 if (!CPLoad) {
5462 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5463 return nullptr;
5464 }
5465
5466 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5467 RBI.constrainGenericRegister(
5468 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5469 return &*Copy;
5470}
5471
5472bool AArch64InstructionSelector::tryOptConstantBuildVec(
5473 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5474 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5474, __extension__ __PRETTY_FUNCTION__))
;
5475 unsigned DstSize = DstTy.getSizeInBits();
5476 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5476, __extension__ __PRETTY_FUNCTION__))
;
5477 if (DstSize < 32)
5478 return false;
5479 // Check if we're building a constant vector, in which case we want to
5480 // generate a constant pool load instead of a vector insert sequence.
5481 SmallVector<Constant *, 16> Csts;
5482 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5483 // Try to find G_CONSTANT or G_FCONSTANT
5484 auto *OpMI =
5485 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5486 if (OpMI)
5487 Csts.emplace_back(
5488 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5489 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5490 I.getOperand(Idx).getReg(), MRI)))
5491 Csts.emplace_back(
5492 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5493 else
5494 return false;
5495 }
5496 Constant *CV = ConstantVector::get(Csts);
5497 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5498 return false;
5499 I.eraseFromParent();
5500 return true;
5501}
5502
5503bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5504 MachineInstr &I, MachineRegisterInfo &MRI) {
5505 // Given:
5506 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5507 //
5508 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5509 Register Dst = I.getOperand(0).getReg();
5510 Register EltReg = I.getOperand(1).getReg();
5511 LLT EltTy = MRI.getType(EltReg);
5512 // If the index isn't on the same bank as its elements, then this can't be a
5513 // SUBREG_TO_REG.
5514 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5515 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5516 if (EltRB != DstRB)
5517 return false;
5518 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5519 [&MRI](const MachineOperand &Op) {
5520 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5521 MRI);
5522 }))
5523 return false;
5524 unsigned SubReg;
5525 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5526 if (!EltRC)
5527 return false;
5528 const TargetRegisterClass *DstRC =
5529 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5530 if (!DstRC)
5531 return false;
5532 if (!getSubRegForClass(EltRC, TRI, SubReg))
5533 return false;
5534 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5535 .addImm(0)
5536 .addUse(EltReg)
5537 .addImm(SubReg);
5538 I.eraseFromParent();
5539 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5540 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5541}
5542
5543bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5544 MachineRegisterInfo &MRI) {
5545 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5545, __extension__ __PRETTY_FUNCTION__))
;
5546 // Until we port more of the optimized selections, for now just use a vector
5547 // insert sequence.
5548 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5549 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5550 unsigned EltSize = EltTy.getSizeInBits();
5551
5552 if (tryOptConstantBuildVec(I, DstTy, MRI))
5553 return true;
5554 if (tryOptBuildVecToSubregToReg(I, MRI))
5555 return true;
5556
5557 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5558 return false; // Don't support all element types yet.
5559 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5560
5561 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5562 MachineInstr *ScalarToVec =
5563 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5564 I.getOperand(1).getReg(), MIB);
5565 if (!ScalarToVec)
5566 return false;
5567
5568 Register DstVec = ScalarToVec->getOperand(0).getReg();
5569 unsigned DstSize = DstTy.getSizeInBits();
5570
5571 // Keep track of the last MI we inserted. Later on, we might be able to save
5572 // a copy using it.
5573 MachineInstr *PrevMI = nullptr;
5574 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5575 // Note that if we don't do a subregister copy, we can end up making an
5576 // extra register.
5577 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(),
5578 i - 1, RB, MIB);
5579 DstVec = PrevMI->getOperand(0).getReg();
5580 }
5581
5582 // If DstTy's size in bits is less than 128, then emit a subregister copy
5583 // from DstVec to the last register we've defined.
5584 if (DstSize < 128) {
5585 // Force this to be FPR using the destination vector.
5586 const TargetRegisterClass *RC =
5587 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5588 if (!RC)
5589 return false;
5590 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5591 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5592 return false;
5593 }
5594
5595 unsigned SubReg = 0;
5596 if (!getSubRegForClass(RC, TRI, SubReg))
5597 return false;
5598 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5599 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5600 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5601 return false;
5602 }
5603
5604 Register Reg = MRI.createVirtualRegister(RC);
5605 Register DstReg = I.getOperand(0).getReg();
5606
5607 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5608 MachineOperand &RegOp = I.getOperand(1);
5609 RegOp.setReg(Reg);
5610 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5611 } else {
5612 // We don't need a subregister copy. Save a copy by re-using the
5613 // destination register on the final insert.
5614 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5614, __extension__ __PRETTY_FUNCTION__))
;
5615 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5616 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5617 }
5618
5619 I.eraseFromParent();
5620 return true;
5621}
5622
5623bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5624 unsigned NumVecs,
5625 MachineInstr &I) {
5626 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5626, __extension__ __PRETTY_FUNCTION__))
;
5627 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__))
;
5628 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5628, __extension__ __PRETTY_FUNCTION__))
;
5629 auto &MRI = *MIB.getMRI();
5630 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5631 unsigned Size = Ty.getSizeInBits();
5632 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5633, __extension__ __PRETTY_FUNCTION__))
5633 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5633, __extension__ __PRETTY_FUNCTION__))
;
5634 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5635 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5636 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5636, __extension__ __PRETTY_FUNCTION__))
;
5637 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5638 Load.cloneMemRefs(I);
5639 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5640 Register SelectedLoadDst = Load->getOperand(0).getReg();
5641 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5642 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5643 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5644 // Emit the subreg copies and immediately select them.
5645 // FIXME: We should refactor our copy code into an emitCopy helper and
5646 // clean up uses of this pattern elsewhere in the selector.
5647 selectCopy(*Vec, TII, MRI, TRI, RBI);
5648 }
5649 return true;
5650}
5651
5652bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5653 MachineInstr &I, MachineRegisterInfo &MRI) {
5654 // Find the intrinsic ID.
5655 unsigned IntrinID = I.getIntrinsicID();
5656
5657 const LLT S8 = LLT::scalar(8);
5658 const LLT S16 = LLT::scalar(16);
5659 const LLT S32 = LLT::scalar(32);
5660 const LLT S64 = LLT::scalar(64);
5661 const LLT P0 = LLT::pointer(0, 64);
5662 // Select the instruction.
5663 switch (IntrinID) {
5664 default:
5665 return false;
5666 case Intrinsic::aarch64_ldxp:
5667 case Intrinsic::aarch64_ldaxp: {
5668 auto NewI = MIB.buildInstr(
5669 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5670 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5671 {I.getOperand(3)});
5672 NewI.cloneMemRefs(I);
5673 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5674 break;
5675 }
5676 case Intrinsic::trap:
5677 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5678 break;
5679 case Intrinsic::debugtrap:
5680 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5681 break;
5682 case Intrinsic::ubsantrap:
5683 MIB.buildInstr(AArch64::BRK, {}, {})
5684 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5685 break;
5686 case Intrinsic::aarch64_neon_ld2: {
5687 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5688 unsigned Opc = 0;
5689 if (Ty == LLT::fixed_vector(8, S8))
5690 Opc = AArch64::LD2Twov8b;
5691 else if (Ty == LLT::fixed_vector(16, S8))
5692 Opc = AArch64::LD2Twov16b;
5693 else if (Ty == LLT::fixed_vector(4, S16))
5694 Opc = AArch64::LD2Twov4h;
5695 else if (Ty == LLT::fixed_vector(8, S16))
5696 Opc = AArch64::LD2Twov8h;
5697 else if (Ty == LLT::fixed_vector(2, S32))
5698 Opc = AArch64::LD2Twov2s;
5699 else if (Ty == LLT::fixed_vector(4, S32))
5700 Opc = AArch64::LD2Twov4s;
5701 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5702 Opc = AArch64::LD2Twov2d;
5703 else if (Ty == S64 || Ty == P0)
5704 Opc = AArch64::LD1Twov1d;
5705 else
5706 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5706)
;
5707 selectVectorLoadIntrinsic(Opc, 2, I);
5708 break;
5709 }
5710 case Intrinsic::aarch64_neon_ld4: {
5711 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5712 unsigned Opc = 0;
5713 if (Ty == LLT::fixed_vector(8, S8))
5714 Opc = AArch64::LD4Fourv8b;
5715 else if (Ty == LLT::fixed_vector(16, S8))
5716 Opc = AArch64::LD4Fourv16b;
5717 else if (Ty == LLT::fixed_vector(4, S16))
5718 Opc = AArch64::LD4Fourv4h;
5719 else if (Ty == LLT::fixed_vector(8, S16))
5720 Opc = AArch64::LD4Fourv8h;
5721 else if (Ty == LLT::fixed_vector(2, S32))
5722 Opc = AArch64::LD4Fourv2s;
5723 else if (Ty == LLT::fixed_vector(4, S32))
5724 Opc = AArch64::LD4Fourv4s;
5725 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5726 Opc = AArch64::LD4Fourv2d;
5727 else if (Ty == S64 || Ty == P0)
5728 Opc = AArch64::LD1Fourv1d;
5729 else
5730 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5730)
;
5731 selectVectorLoadIntrinsic(Opc, 4, I);
5732 break;
5733 }
5734 case Intrinsic::aarch64_neon_st2: {
5735 Register Src1 = I.getOperand(1).getReg();
5736 Register Src2 = I.getOperand(2).getReg();
5737 Register Ptr = I.getOperand(3).getReg();
5738 LLT Ty = MRI.getType(Src1);
5739 unsigned Opc;
5740 if (Ty == LLT::fixed_vector(8, S8))
5741 Opc = AArch64::ST2Twov8b;
5742 else if (Ty == LLT::fixed_vector(16, S8))
5743 Opc = AArch64::ST2Twov16b;
5744 else if (Ty == LLT::fixed_vector(4, S16))
5745 Opc = AArch64::ST2Twov4h;
5746 else if (Ty == LLT::fixed_vector(8, S16))
5747 Opc = AArch64::ST2Twov8h;
5748 else if (Ty == LLT::fixed_vector(2, S32))
5749 Opc = AArch64::ST2Twov2s;
5750 else if (Ty == LLT::fixed_vector(4, S32))
5751 Opc = AArch64::ST2Twov4s;
5752 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5753 Opc = AArch64::ST2Twov2d;
5754 else if (Ty == S64 || Ty == P0)
5755 Opc = AArch64::ST1Twov1d;
5756 else
5757 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5757)
;
5758 SmallVector<Register, 2> Regs = {Src1, Src2};
5759 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5760 : createDTuple(Regs, MIB);
5761 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5762 Store.cloneMemRefs(I);
5763 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5764 break;
5765 }
5766 case Intrinsic::aarch64_mops_memset_tag: {
5767 // Transform
5768 // %dst:gpr(p0) = \
5769 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
5770 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
5771 // where %dst is updated, into
5772 // %Rd:GPR64common, %Rn:GPR64) = \
5773 // MOPSMemorySetTaggingPseudo \
5774 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
5775 // where Rd and Rn are tied.
5776 // It is expected that %val has been extended to s64 in legalization.
5777 // Note that the order of the size/value operands are swapped.
5778
5779 Register DstDef = I.getOperand(0).getReg();
5780 // I.getOperand(1) is the intrinsic function
5781 Register DstUse = I.getOperand(2).getReg();
5782 Register ValUse = I.getOperand(3).getReg();
5783 Register SizeUse = I.getOperand(4).getReg();
5784
5785 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
5786 // Therefore an additional virtual register is requried for the updated size
5787 // operand. This value is not accessible via the semantics of the intrinsic.
5788 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
5789
5790 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
5791 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
5792 Memset.cloneMemRefs(I);
5793 constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
5794 break;
5795 }
5796 }
5797
5798 I.eraseFromParent();
5799 return true;
5800}
5801
5802bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5803 MachineRegisterInfo &MRI) {
5804 unsigned IntrinID = I.getIntrinsicID();
5805
5806 switch (IntrinID) {
5807 default:
5808 break;
5809 case Intrinsic::aarch64_crypto_sha1h: {
5810 Register DstReg = I.getOperand(0).getReg();
5811 Register SrcReg = I.getOperand(2).getReg();
5812
5813 // FIXME: Should this be an assert?
5814 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5815 MRI.getType(SrcReg).getSizeInBits() != 32)
5816 return false;
5817
5818 // The operation has to happen on FPRs. Set up some new FPR registers for
5819 // the source and destination if they are on GPRs.
5820 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5821 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5822 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5823
5824 // Make sure the copy ends up getting constrained properly.
5825 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5826 AArch64::GPR32RegClass, MRI);
5827 }
5828
5829 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5830 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5831
5832 // Actually insert the instruction.
5833 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5834 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5835
5836 // Did we create a new register for the destination?
5837 if (DstReg != I.getOperand(0).getReg()) {
5838 // Yep. Copy the result of the instruction back into the original
5839 // destination.
5840 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5841 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5842 AArch64::GPR32RegClass, MRI);
5843 }
5844
5845 I.eraseFromParent();
5846 return true;
5847 }
5848 case Intrinsic::ptrauth_sign: {
5849 Register DstReg = I.getOperand(0).getReg();
5850 Register ValReg = I.getOperand(2).getReg();
5851 uint64_t Key = I.getOperand(3).getImm();
5852 Register DiscReg = I.getOperand(4).getReg();
5853 auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5854 bool IsDiscZero = DiscVal && DiscVal->isZero();
5855
5856 if (Key > AArch64PACKey::LAST)
5857 return false;
5858
5859 unsigned Opcodes[][4] = {
5860 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5861 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5862 unsigned Opcode = Opcodes[IsDiscZero][Key];
5863
5864 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5865
5866 if (!IsDiscZero) {
5867 PAC.addUse(DiscReg);
5868 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5869 }
5870
5871 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5872 I.eraseFromParent();
5873 return true;
5874 }
5875 case Intrinsic::ptrauth_strip: {
5876 Register DstReg = I.getOperand(0).getReg();
5877 Register ValReg = I.getOperand(2).getReg();
5878 uint64_t Key = I.getOperand(3).getImm();
5879
5880 if (Key > AArch64PACKey::LAST)
5881 return false;
5882 unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key);
5883
5884 MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5885
5886 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5887 RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI);
5888 I.eraseFromParent();
5889 return true;
5890 }
5891 case Intrinsic::frameaddress:
5892 case Intrinsic::returnaddress: {
5893 MachineFunction &MF = *I.getParent()->getParent();
5894 MachineFrameInfo &MFI = MF.getFrameInfo();
5895
5896 unsigned Depth = I.getOperand(2).getImm();
5897 Register DstReg = I.getOperand(0).getReg();
5898 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5899
5900 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5901 if (!MFReturnAddr) {
5902 // Insert the copy from LR/X30 into the entry block, before it can be
5903 // clobbered by anything.
5904 MFI.setReturnAddressIsTaken(true);
5905 MFReturnAddr = getFunctionLiveInPhysReg(
5906 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
5907 }
5908
5909 if (STI.hasPAuth()) {
5910 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5911 } else {
5912 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5913 MIB.buildInstr(AArch64::XPACLRI);
5914 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5915 }
5916
5917 I.eraseFromParent();
5918 return true;
5919 }
5920
5921 MFI.setFrameAddressIsTaken(true);
5922 Register FrameAddr(AArch64::FP);
5923 while (Depth--) {
5924 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5925 auto Ldr =
5926 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5927 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5928 FrameAddr = NextFrame;
5929 }
5930
5931 if (IntrinID == Intrinsic::frameaddress)
5932 MIB.buildCopy({DstReg}, {FrameAddr});
5933 else {
5934 MFI.setReturnAddressIsTaken(true);
5935
5936 if (STI.hasPAuth()) {
5937 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5938 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5939 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5940 } else {
5941 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5942 .addImm(1);
5943 MIB.buildInstr(AArch64::XPACLRI);
5944 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5945 }
5946 }
5947
5948 I.eraseFromParent();
5949 return true;
5950 }
5951 case Intrinsic::swift_async_context_addr:
5952 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5953 {Register(AArch64::FP)})
5954 .addImm(8)
5955 .addImm(0);
5956 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5957
5958 MF->getFrameInfo().setFrameAddressIsTaken(true);
5959 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5960 I.eraseFromParent();
5961 return true;
5962 }
5963 return false;
5964}
5965
5966InstructionSelector::ComplexRendererFns
5967AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5968 auto MaybeImmed = getImmedFromMO(Root);
5969 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
5970 return std::nullopt;
5971 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5972 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5973}
5974
5975InstructionSelector::ComplexRendererFns
5976AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5977 auto MaybeImmed = getImmedFromMO(Root);
5978 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
5979 return std::nullopt;
5980 uint64_t Enc = 31 - *MaybeImmed;
5981 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5982}
5983
5984InstructionSelector::ComplexRendererFns
5985AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5986 auto MaybeImmed = getImmedFromMO(Root);
5987 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
5988 return std::nullopt;
5989 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5990 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5991}
5992
5993InstructionSelector::ComplexRendererFns
5994AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5995 auto MaybeImmed = getImmedFromMO(Root);
5996 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
5997 return std::nullopt;
5998 uint64_t Enc = 63 - *MaybeImmed;
5999 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6000}
6001
6002/// Helper to select an immediate value that can be represented as a 12-bit
6003/// value shifted left by either 0 or 12. If it is possible to do so, return
6004/// the immediate and shift value. If not, return std::nullopt.
6005///
6006/// Used by selectArithImmed and selectNegArithImmed.
6007InstructionSelector::ComplexRendererFns
6008AArch64InstructionSelector::select12BitValueWithLeftShift(
6009 uint64_t Immed) const {
6010 unsigned ShiftAmt;
6011 if (Immed >> 12 == 0) {
6012 ShiftAmt = 0;
6013 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6014 ShiftAmt = 12;
6015 Immed = Immed >> 12;
6016 } else
6017 return std::nullopt;
6018
6019 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6020 return {{
6021 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6022 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6023 }};
6024}
6025
6026/// SelectArithImmed - Select an immediate value that can be represented as
6027/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6028/// Val set to the 12-bit value and Shift set to the shifter operand.
6029InstructionSelector::ComplexRendererFns
6030AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6031 // This function is called from the addsub_shifted_imm ComplexPattern,
6032 // which lists [imm] as the list of opcode it's interested in, however
6033 // we still need to check whether the operand is actually an immediate
6034 // here because the ComplexPattern opcode list is only used in
6035 // root-level opcode matching.
6036 auto MaybeImmed = getImmedFromMO(Root);
6037 if (MaybeImmed == std::nullopt)
6038 return std::nullopt;
6039 return select12BitValueWithLeftShift(*MaybeImmed);
6040}
6041
6042/// SelectNegArithImmed - As above, but negates the value before trying to
6043/// select it.
6044InstructionSelector::ComplexRendererFns
6045AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6046 // We need a register here, because we need to know if we have a 64 or 32
6047 // bit immediate.
6048 if (!Root.isReg())
6049 return std::nullopt;
6050 auto MaybeImmed = getImmedFromMO(Root);
6051 if (MaybeImmed == std::nullopt)
6052 return std::nullopt;
6053 uint64_t Immed = *MaybeImmed;
6054
6055 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6056 // have the opposite effect on the C flag, so this pattern mustn't match under
6057 // those circumstances.
6058 if (Immed == 0)
6059 return std::nullopt;
6060
6061 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6062 // the root.
6063 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6064 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6065 Immed = ~((uint32_t)Immed) + 1;
6066 else
6067 Immed = ~Immed + 1ULL;
6068
6069 if (Immed & 0xFFFFFFFFFF000000ULL)
6070 return std::nullopt;
6071
6072 Immed &= 0xFFFFFFULL;
6073 return select12BitValueWithLeftShift(Immed);
6074}
6075
6076/// Return true if it is worth folding MI into an extended register. That is,
6077/// if it's safe to pull it into the addressing mode of a load or store as a
6078/// shift.
6079bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6080 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6081 // Always fold if there is one use, or if we're optimizing for size.
6082 Register DefReg = MI.getOperand(0).getReg();
6083 if (MRI.hasOneNonDBGUse(DefReg) ||
6084 MI.getParent()->getParent()->getFunction().hasOptSize())
6085 return true;
6086
6087 // It's better to avoid folding and recomputing shifts when we don't have a
6088 // fastpath.
6089 if (!STI.hasLSLFast())
6090 return false;
6091
6092 // We have a fastpath, so folding a shift in and potentially computing it
6093 // many times may be beneficial. Check if this is only used in memory ops.
6094 // If it is, then we should fold.
6095 return all_of(MRI.use_nodbg_instructions(DefReg),
6096 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6097}
6098
6099static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
6100 switch (Type) {
6101 case AArch64_AM::SXTB:
6102 case AArch64_AM::SXTH:
6103 case AArch64_AM::SXTW:
6104 return true;
6105 default:
6106 return false;
6107 }
6108}
6109
6110InstructionSelector::ComplexRendererFns
6111AArch64InstructionSelector::selectExtendedSHL(
6112 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
6113 unsigned SizeInBytes, bool WantsExt) const {
6114 assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand"
) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6114, __extension__ __PRETTY_FUNCTION__))
;
6115 assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand"
) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6115, __extension__ __PRETTY_FUNCTION__))
;
6116
6117 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6118 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6119
6120 unsigned OffsetOpc = OffsetInst->getOpcode();
6121 bool LookedThroughZExt = false;
6122 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6123 // Try to look through a ZEXT.
6124 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6125 return std::nullopt;
6126
6127 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6128 OffsetOpc = OffsetInst->getOpcode();
6129 LookedThroughZExt = true;
6130
6131 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6132 return std::nullopt;
6133 }
6134 // Make sure that the memory op is a valid size.
6135 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6136 if (LegalShiftVal == 0)
6137 return std::nullopt;
6138 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6139 return std::nullopt;
6140
6141 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6142 // register we will offset is the LHS, and the register containing the
6143 // constant is the RHS.
6144 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6145 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6146 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6147 if (!ValAndVReg) {
6148 // We didn't get a constant on the RHS. If the opcode is a shift, then
6149 // we're done.
6150 if (OffsetOpc == TargetOpcode::G_SHL)
6151 return std::nullopt;
6152
6153 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6154 std::swap(OffsetReg, ConstantReg);
6155 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6156 if (!ValAndVReg)
6157 return std::nullopt;
6158 }
6159
6160 // The value must fit into 3 bits, and must be positive. Make sure that is
6161 // true.
6162 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6163
6164 // Since we're going to pull this into a shift, the constant value must be
6165 // a power of 2. If we got a multiply, then we need to check this.
6166 if (OffsetOpc == TargetOpcode::G_MUL) {
6167 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6168 return std::nullopt;
6169
6170 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6171 ImmVal = Log2_32(ImmVal);
6172 }
6173
6174 if ((ImmVal & 0x7) != ImmVal)
6175 return std::nullopt;
6176
6177 // We are only allowed to shift by LegalShiftVal. This shift value is built
6178 // into the instruction, so we can't just use whatever we want.
6179 if (ImmVal != LegalShiftVal)
6180 return std::nullopt;
6181
6182 unsigned SignExtend = 0;
6183 if (WantsExt) {
6184 // Check if the offset is defined by an extend, unless we looked through a
6185 // G_ZEXT earlier.
6186 if (!LookedThroughZExt) {
6187 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6188 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6189 if (Ext == AArch64_AM::InvalidShiftExtend)
6190 return std::nullopt;
6191
6192 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6193 // We only support SXTW for signed extension here.
6194 if (SignExtend && Ext != AArch64_AM::SXTW)
6195 return std::nullopt;
6196 OffsetReg = ExtInst->getOperand(1).getReg();
6197 }
6198
6199 // Need a 32-bit wide register here.
6200 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6201 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6202 }
6203
6204 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6205 // offset. Signify that we are shifting by setting the shift flag to 1.
6206 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6207 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6208 [=](MachineInstrBuilder &MIB) {
6209 // Need to add both immediates here to make sure that they are both
6210 // added to the instruction.
6211 MIB.addImm(SignExtend);
6212 MIB.addImm(1);
6213 }}};
6214}
6215
6216/// This is used for computing addresses like this:
6217///
6218/// ldr x1, [x2, x3, lsl #3]
6219///
6220/// Where x2 is the base register, and x3 is an offset register. The shift-left
6221/// is a constant value specific to this load instruction. That is, we'll never
6222/// see anything other than a 3 here (which corresponds to the size of the
6223/// element being loaded.)
6224InstructionSelector::ComplexRendererFns
6225AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6226 MachineOperand &Root, unsigned SizeInBytes) const {
6227 if (!Root.isReg())
6228 return std::nullopt;
6229 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6230
6231 // We want to find something like this:
6232 //
6233 // val = G_CONSTANT LegalShiftVal
6234 // shift = G_SHL off_reg val
6235 // ptr = G_PTR_ADD base_reg shift
6236 // x = G_LOAD ptr
6237 //
6238 // And fold it into this addressing mode:
6239 //
6240 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
6241
6242 // Check if we can find the G_PTR_ADD.
6243 MachineInstr *PtrAdd =
6244 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6245 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6246 return std::nullopt;
6247
6248 // Now, try to match an opcode which will match our specific offset.
6249 // We want a G_SHL or a G_MUL.
6250 MachineInstr *OffsetInst =
6251 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
6252 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
6253 OffsetInst->getOperand(0), SizeInBytes,
6254 /*WantsExt=*/false);
6255}
6256
6257/// This is used for computing addresses like this:
6258///
6259/// ldr x1, [x2, x3]
6260///
6261/// Where x2 is the base register, and x3 is an offset register.
6262///
6263/// When possible (or profitable) to fold a G_PTR_ADD into the address
6264/// calculation, this will do so. Otherwise, it will return std::nullopt.
6265InstructionSelector::ComplexRendererFns
6266AArch64InstructionSelector::selectAddrModeRegisterOffset(
6267 MachineOperand &Root) const {
6268 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6269
6270 // We need a GEP.
6271 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
6272 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
6273 return std::nullopt;
6274
6275 // If this is used more than once, let's not bother folding.
6276 // TODO: Check if they are memory ops. If they are, then we can still fold
6277 // without having to recompute anything.
6278 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
6279 return std::nullopt;
6280
6281 // Base is the GEP's LHS, offset is its RHS.
6282 return {{[=](MachineInstrBuilder &MIB) {
6283 MIB.addUse(Gep->getOperand(1).getReg());
6284 },
6285 [=](MachineInstrBuilder &MIB) {
6286 MIB.addUse(Gep->getOperand(2).getReg());
6287 },
6288 [=](MachineInstrBuilder &MIB) {
6289 // Need to add both immediates here to make sure that they are both
6290 // added to the instruction.
6291 MIB.addImm(0);
6292 MIB.addImm(0);
6293 }}};
6294}
6295
6296/// This is intended to be equivalent to selectAddrModeXRO in
6297/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
6298InstructionSelector::ComplexRendererFns
6299AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
6300 unsigned SizeInBytes) const {
6301 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6302 if (!Root.isReg())
6303 return std::nullopt;
6304 MachineInstr *PtrAdd =
6305 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6306 if (!PtrAdd)
6307 return std::nullopt;
6308
6309 // Check for an immediates which cannot be encoded in the [base + imm]
6310 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
6311 // end up with code like:
6312 //
6313 // mov x0, wide
6314 // add x1 base, x0
6315 // ldr x2, [x1, x0]
6316 //
6317 // In this situation, we can use the [base, xreg] addressing mode to save an
6318 // add/sub:
6319 //
6320 // mov x0, wide
6321 // ldr x2, [base, x0]
6322 auto ValAndVReg =
6323 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
6324 if (ValAndVReg) {
6325 unsigned Scale = Log2_32(SizeInBytes);
6326 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
6327
6328 // Skip immediates that can be selected in the load/store addresing
6329 // mode.
6330 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
6331 ImmOff < (0x1000 << Scale))
6332 return std::nullopt;
6333
6334 // Helper lambda to decide whether or not it is preferable to emit an add.
6335 auto isPreferredADD = [](int64_t ImmOff) {
6336 // Constants in [0x0, 0xfff] can be encoded in an add.
6337 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
6338 return true;
6339
6340 // Can it be encoded in an add lsl #12?
6341 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
6342 return false;
6343
6344 // It can be encoded in an add lsl #12, but we may not want to. If it is
6345 // possible to select this as a single movz, then prefer that. A single
6346 // movz is faster than an add with a shift.
6347 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
6348 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
6349 };
6350
6351 // If the immediate can be encoded in a single add/sub, then bail out.
6352 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
6353 return std::nullopt;
6354 }
6355
6356 // Try to fold shifts into the addressing mode.
6357 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
6358 if (AddrModeFns)
6359 return AddrModeFns;
6360
6361 // If that doesn't work, see if it's possible to fold in registers from
6362 // a GEP.
6363 return selectAddrModeRegisterOffset(Root);
6364}
6365
6366/// This is used for computing addresses like this:
6367///
6368/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
6369///
6370/// Where we have a 64-bit base register, a 32-bit offset register, and an
6371/// extend (which may or may not be signed).
6372InstructionSelector::ComplexRendererFns
6373AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
6374 unsigned SizeInBytes) const {
6375 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6376
6377 MachineInstr *PtrAdd =
6378 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6379 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6380 return std::nullopt;
6381
6382 MachineOperand &LHS = PtrAdd->getOperand(1);
6383 MachineOperand &RHS = PtrAdd->getOperand(2);
6384 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
6385
6386 // The first case is the same as selectAddrModeXRO, except we need an extend.
6387 // In this case, we try to find a shift and extend, and fold them into the
6388 // addressing mode.
6389 //
6390 // E.g.
6391 //
6392 // off_reg = G_Z/S/ANYEXT ext_reg
6393 // val = G_CONSTANT LegalShiftVal
6394 // shift = G_SHL off_reg val
6395 // ptr = G_PTR_ADD base_reg shift
6396 // x = G_LOAD ptr
6397 //
6398 // In this case we can get a load like this:
6399 //
6400 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
6401 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
6402 SizeInBytes, /*WantsExt=*/true);
6403 if (ExtendedShl)
6404 return ExtendedShl;
6405
6406 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
6407 //
6408 // e.g.
6409 // ldr something, [base_reg, ext_reg, sxtw]
6410 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6411 return std::nullopt;
6412
6413 // Check if this is an extend. We'll get an extend type if it is.
6414 AArch64_AM::ShiftExtendType Ext =
6415 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
6416 if (Ext == AArch64_AM::InvalidShiftExtend)
6417 return std::nullopt;
6418
6419 // Need a 32-bit wide register.
6420 MachineIRBuilder MIB(*PtrAdd);
6421 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
6422 AArch64::GPR32RegClass, MIB);
6423 unsigned SignExtend = Ext == AArch64_AM::SXTW;
6424
6425 // Base is LHS, offset is ExtReg.
6426 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
6427 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6428 [=](MachineInstrBuilder &MIB) {
6429 MIB.addImm(SignExtend);
6430 MIB.addImm(0);
6431 }}};
6432}
6433
6434/// Select a "register plus unscaled signed 9-bit immediate" address. This
6435/// should only match when there is an offset that is not valid for a scaled
6436/// immediate addressing mode. The "Size" argument is the size in bytes of the
6437/// memory reference, which is needed here to know what is valid for a scaled
6438/// immediate.
6439InstructionSelector::ComplexRendererFns
6440AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
6441 unsigned Size) const {
6442 MachineRegisterInfo &MRI =
6443 Root.getParent()->getParent()->getParent()->getRegInfo();
6444
6445 if (!Root.isReg())
12
Taking false branch
6446 return std::nullopt;
6447
6448 if (!isBaseWithConstantOffset(Root, MRI))
13
Assuming the condition is false
14
Taking false branch
6449 return std::nullopt;
6450
6451 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6452
6453 MachineOperand &OffImm = RootDef->getOperand(2);
6454 if (!OffImm.isReg())
15
Taking false branch
6455 return std::nullopt;
6456 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
6457 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
16
Assuming the condition is false
17
Taking false branch
6458 return std::nullopt;
6459 int64_t RHSC;
6460 MachineOperand &RHSOp1 = RHS->getOperand(1);
6461 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
18
Assuming the condition is false
19
Taking false branch
6462 return std::nullopt;
6463 RHSC = RHSOp1.getCImm()->getSExtValue();
6464
6465 // If the offset is valid as a scaled immediate, don't match here.
6466 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
20
Assuming the condition is true
21
Assuming 'RHSC' is >= 0
22
Calling 'Log2_32'
24
Returning from 'Log2_32'
25
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
6467 return std::nullopt;
6468 if (RHSC >= -256 && RHSC < 256) {
6469 MachineOperand &Base = RootDef->getOperand(1);
6470 return {{
6471 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
6472 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
6473 }};
6474 }
6475 return std::nullopt;
6476}
6477
6478InstructionSelector::ComplexRendererFns
6479AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
6480 unsigned Size,
6481 MachineRegisterInfo &MRI) const {
6482 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
6483 return std::nullopt;
6484 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
6485 if (Adrp.getOpcode() != AArch64::ADRP)
6486 return std::nullopt;
6487
6488 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
6489 auto Offset = Adrp.getOperand(1).getOffset();
6490 if (Offset % Size != 0)
6491 return std::nullopt;
6492
6493 auto GV = Adrp.getOperand(1).getGlobal();
6494 if (GV->isThreadLocal())
6495 return std::nullopt;
6496
6497 auto &MF = *RootDef.getParent()->getParent();
6498 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
6499 return std::nullopt;
6500
6501 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
6502 MachineIRBuilder MIRBuilder(RootDef);
6503 Register AdrpReg = Adrp.getOperand(0).getReg();
6504 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
6505 [=](MachineInstrBuilder &MIB) {
6506 MIB.addGlobalAddress(GV, Offset,
6507 OpFlags | AArch64II::MO_PAGEOFF |
6508 AArch64II::MO_NC);
6509 }}};
6510}
6511
6512/// Select a "register plus scaled unsigned 12-bit immediate" address. The
6513/// "Size" argument is the size in bytes of the memory reference, which
6514/// determines the scale.
6515InstructionSelector::ComplexRendererFns
6516AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
6517 unsigned Size) const {
6518 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
6519 MachineRegisterInfo &MRI = MF.getRegInfo();
6520
6521 if (!Root.isReg())
4
Taking false branch
6522 return std::nullopt;
6523
6524 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6525 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5
Assuming the condition is false
6
Taking false branch
6526 return {{
6527 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
6528 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6529 }};
6530 }
6531
6532 CodeModel::Model CM = MF.getTarget().getCodeModel();
6533 // Check if we can fold in the ADD of small code model ADRP + ADD address.
6534 if (CM == CodeModel::Small) {
7
Assuming 'CM' is not equal to Small
8
Taking false branch
6535 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
6536 if (OpFns)
6537 return OpFns;
6538 }
6539
6540 if (isBaseWithConstantOffset(Root, MRI)) {
9
Assuming the condition is false
10
Taking false branch
6541 MachineOperand &LHS = RootDef->getOperand(1);
6542 MachineOperand &RHS = RootDef->getOperand(2);
6543 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
6544 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
6545
6546 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
6547 unsigned Scale = Log2_32(Size);
6548 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
6549 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
6550 return {{
6551 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
6552 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6553 }};
6554
6555 return {{
6556 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
6557 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6558 }};
6559 }
6560 }
6561
6562 // Before falling back to our general case, check if the unscaled
6563 // instructions can handle this. If so, that's preferable.
6564 if (selectAddrModeUnscaled(Root, Size))
11
Calling 'AArch64InstructionSelector::selectAddrModeUnscaled'
6565 return std::nullopt;
6566
6567 return {{
6568 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6569 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6570 }};
6571}
6572
6573/// Given a shift instruction, return the correct shift type for that
6574/// instruction.
6575static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
6576 switch (MI.getOpcode()) {
6577 default:
6578 return AArch64_AM::InvalidShiftExtend;
6579 case TargetOpcode::G_SHL:
6580 return AArch64_AM::LSL;
6581 case TargetOpcode::G_LSHR:
6582 return AArch64_AM::LSR;
6583 case TargetOpcode::G_ASHR:
6584 return AArch64_AM::ASR;
6585 case TargetOpcode::G_ROTR:
6586 return AArch64_AM::ROR;
6587 }
6588}
6589
6590/// Select a "shifted register" operand. If the value is not shifted, set the
6591/// shift operand to a default value of "lsl 0".
6592InstructionSelector::ComplexRendererFns
6593AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
6594 bool AllowROR) const {
6595 if (!Root.isReg())
6596 return std::nullopt;
6597 MachineRegisterInfo &MRI =
6598 Root.getParent()->getParent()->getParent()->getRegInfo();
6599
6600 // Check if the operand is defined by an instruction which corresponds to
6601 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
6602 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
6603 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
6604 if (ShType == AArch64_AM::InvalidShiftExtend)
6605 return std::nullopt;
6606 if (ShType == AArch64_AM::ROR && !AllowROR)
6607 return std::nullopt;
6608 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
6609 return std::nullopt;
6610
6611 // Need an immediate on the RHS.
6612 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
6613 auto Immed = getImmedFromMO(ShiftRHS);
6614 if (!Immed)
6615 return std::nullopt;
6616
6617 // We have something that we can fold. Fold in the shift's LHS and RHS into
6618 // the instruction.
6619 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
6620 Register ShiftReg = ShiftLHS.getReg();
6621
6622 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
6623 unsigned Val = *Immed & (NumBits - 1);
6624 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
6625
6626 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
6627 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
6628}
6629
6630AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
6631 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
6632 unsigned Opc = MI.getOpcode();
6633
6634 // Handle explicit extend instructions first.
6635 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
6636 unsigned Size;
6637 if (Opc == TargetOpcode::G_SEXT)
6638 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6639 else
6640 Size = MI.getOperand(2).getImm();
6641 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6641, __extension__ __PRETTY_FUNCTION__))
;
6642 switch (Size) {
6643 case 8:
6644 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
6645 case 16:
6646 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
6647 case 32:
6648 return AArch64_AM::SXTW;
6649 default:
6650 return AArch64_AM::InvalidShiftExtend;
6651 }
6652 }
6653
6654 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
6655 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6656 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6656, __extension__ __PRETTY_FUNCTION__))
;
6657 switch (Size) {
6658 case 8:
6659 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
6660 case 16:
6661 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
6662 case 32:
6663 return AArch64_AM::UXTW;
6664 default:
6665 return AArch64_AM::InvalidShiftExtend;
6666 }
6667 }
6668
6669 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
6670 // on the RHS.
6671 if (Opc != TargetOpcode::G_AND)
6672 return AArch64_AM::InvalidShiftExtend;
6673
6674 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
6675 if (!MaybeAndMask)
6676 return AArch64_AM::InvalidShiftExtend;
6677 uint64_t AndMask = *MaybeAndMask;
6678 switch (AndMask) {
6679 default:
6680 return AArch64_AM::InvalidShiftExtend;
6681 case 0xFF:
6682 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
6683 case 0xFFFF:
6684 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
6685 case 0xFFFFFFFF:
6686 return AArch64_AM::UXTW;
6687 }
6688}
6689
6690Register AArch64InstructionSelector::moveScalarRegClass(
6691 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
6692 MachineRegisterInfo &MRI = *MIB.getMRI();
6693 auto Ty = MRI.getType(Reg);
6694 assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6694, __extension__ __PRETTY_FUNCTION__))
;
6695 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
6696 return Reg;
6697
6698 // Create a copy and immediately select it.
6699 // FIXME: We should have an emitCopy function?
6700 auto Copy = MIB.buildCopy({&RC}, {Reg});
6701 selectCopy(*Copy, TII, MRI, TRI, RBI);
6702 return Copy.getReg(0);
6703}
6704
6705/// Select an "extended register" operand. This operand folds in an extend
6706/// followed by an optional left shift.
6707InstructionSelector::ComplexRendererFns
6708AArch64InstructionSelector::selectArithExtendedRegister(
6709 MachineOperand &Root) const {
6710 if (!Root.isReg())
6711 return std::nullopt;
6712 MachineRegisterInfo &MRI =
6713 Root.getParent()->getParent()->getParent()->getRegInfo();
6714
6715 uint64_t ShiftVal = 0;
6716 Register ExtReg;
6717 AArch64_AM::ShiftExtendType Ext;
6718 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
6719 if (!RootDef)
6720 return std::nullopt;
6721
6722 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
6723 return std::nullopt;
6724
6725 // Check if we can fold a shift and an extend.
6726 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
6727 // Look for a constant on the RHS of the shift.
6728 MachineOperand &RHS = RootDef->getOperand(2);
6729 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
6730 if (!MaybeShiftVal)
6731 return std::nullopt;
6732 ShiftVal = *MaybeShiftVal;
6733 if (ShiftVal > 4)
6734 return std::nullopt;
6735 // Look for a valid extend instruction on the LHS of the shift.
6736 MachineOperand &LHS = RootDef->getOperand(1);
6737 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
6738 if (!ExtDef)
6739 return std::nullopt;
6740 Ext = getExtendTypeForInst(*ExtDef, MRI);
6741 if (Ext == AArch64_AM::InvalidShiftExtend)
6742 return std::nullopt;
6743 ExtReg = ExtDef->getOperand(1).getReg();
6744 } else {
6745 // Didn't get a shift. Try just folding an extend.
6746 Ext = getExtendTypeForInst(*RootDef, MRI);
6747 if (Ext == AArch64_AM::InvalidShiftExtend)
6748 return std::nullopt;
6749 ExtReg = RootDef->getOperand(1).getReg();
6750
6751 // If we have a 32 bit instruction which zeroes out the high half of a
6752 // register, we get an implicit zero extend for free. Check if we have one.
6753 // FIXME: We actually emit the extend right now even though we don't have
6754 // to.
6755 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
6756 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
6757 if (isDef32(*ExtInst))
6758 return std::nullopt;
6759 }
6760 }
6761
6762 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
6763 // copy.
6764 MachineIRBuilder MIB(*RootDef);
6765 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
6766
6767 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6768 [=](MachineInstrBuilder &MIB) {
6769 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
6770 }}};
6771}
6772
6773void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
6774 const MachineInstr &MI,
6775 int OpIdx) const {
6776 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6777 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6778, __extension__ __PRETTY_FUNCTION__))
6778 "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6778, __extension__ __PRETTY_FUNCTION__))
;
6779 std::optional<int64_t> CstVal =
6780 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
6781 assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value"
) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6781, __extension__ __PRETTY_FUNCTION__))
;
6782 MIB.addImm(*CstVal);
6783}
6784
6785void AArch64InstructionSelector::renderLogicalImm32(
6786 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6787 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6788, __extension__ __PRETTY_FUNCTION__))
6788 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6788, __extension__ __PRETTY_FUNCTION__))
;
6789 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6790 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
6791 MIB.addImm(Enc);
6792}
6793
6794void AArch64InstructionSelector::renderLogicalImm64(
6795 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6796 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6797, __extension__ __PRETTY_FUNCTION__))
6797 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6797, __extension__ __PRETTY_FUNCTION__))
;
6798 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6799 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
6800 MIB.addImm(Enc);
6801}
6802
6803void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
6804 const MachineInstr &MI,
6805 int OpIdx) const {
6806 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6807, __extension__ __PRETTY_FUNCTION__))
6807 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6807, __extension__ __PRETTY_FUNCTION__))
;
6808 MIB.addImm(
6809 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6810}
6811
6812void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
6813 const MachineInstr &MI,
6814 int OpIdx) const {
6815 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
6816 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
;
6817 MIB.addImm(
6818 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6819}
6820
6821void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
6822 const MachineInstr &MI,
6823 int OpIdx) const {
6824 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
6825 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
;
6826 MIB.addImm(
6827 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6828}
6829
6830void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
6831 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
6832 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6833, __extension__ __PRETTY_FUNCTION__))
6833 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6833, __extension__ __PRETTY_FUNCTION__))
;
6834 MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
6835 .getFPImm()
6836 ->getValueAPF()
6837 .bitcastToAPInt()
6838 .getZExtValue()));
6839}
6840
6841bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
6842 const MachineInstr &MI, unsigned NumBytes) const {
6843 if (!MI.mayLoadOrStore())
6844 return false;
6845 assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6846, __extension__ __PRETTY_FUNCTION__))
6846 "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6846, __extension__ __PRETTY_FUNCTION__))
;
6847 return (*MI.memoperands_begin())->getSize() == NumBytes;
6848}
6849
6850bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
6851 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6852 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
6853 return false;
6854
6855 // Only return true if we know the operation will zero-out the high half of
6856 // the 64-bit register. Truncates can be subregister copies, which don't
6857 // zero out the high bits. Copies and other copy-like instructions can be
6858 // fed by truncates, or could be lowered as subregister copies.
6859 switch (MI.getOpcode()) {
6860 default:
6861 return true;
6862 case TargetOpcode::COPY:
6863 case TargetOpcode::G_BITCAST:
6864 case TargetOpcode::G_TRUNC:
6865 case TargetOpcode::G_PHI:
6866 return false;
6867 }
6868}
6869
6870
6871// Perform fixups on the given PHI instruction's operands to force them all
6872// to be the same as the destination regbank.
6873static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
6874 const AArch64RegisterBankInfo &RBI) {
6875 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI
&& "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6875, __extension__ __PRETTY_FUNCTION__))
;
6876 Register DstReg = MI.getOperand(0).getReg();
6877 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
6878 assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned"
) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6878, __extension__ __PRETTY_FUNCTION__))
;
6879 MachineIRBuilder MIB(MI);
6880
6881 // Go through each operand and ensure it has the same regbank.
6882 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
6883 if (!MO.isReg())
6884 continue;
6885 Register OpReg = MO.getReg();
6886 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
6887 if (RB != DstRB) {
6888 // Insert a cross-bank copy.
6889 auto *OpDef = MRI.getVRegDef(OpReg);
6890 const LLT &Ty = MRI.getType(OpReg);
6891 MachineBasicBlock &OpDefBB = *OpDef->getParent();
6892
6893 // Any instruction we insert must appear after all PHIs in the block
6894 // for the block to be valid MIR.
6895 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
6896 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
6897 InsertPt = OpDefBB.getFirstNonPHI();
6898 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
6899 auto Copy = MIB.buildCopy(Ty, OpReg);
6900 MRI.setRegBank(Copy.getReg(0), *DstRB);
6901 MO.setReg(Copy.getReg(0));
6902 }
6903 }
6904}
6905
6906void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
6907 // We're looking for PHIs, build a list so we don't invalidate iterators.
6908 MachineRegisterInfo &MRI = MF.getRegInfo();
6909 SmallVector<MachineInstr *, 32> Phis;
6910 for (auto &BB : MF) {
6911 for (auto &MI : BB) {
6912 if (MI.getOpcode() == TargetOpcode::G_PHI)
6913 Phis.emplace_back(&MI);
6914 }
6915 }
6916
6917 for (auto *MI : Phis) {
6918 // We need to do some work here if the operand types are < 16 bit and they
6919 // are split across fpr/gpr banks. Since all types <32b on gpr
6920 // end up being assigned gpr32 regclasses, we can end up with PHIs here
6921 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
6922 // be selecting heterogenous regbanks for operands if possible, but we
6923 // still need to be able to deal with it here.
6924 //
6925 // To fix this, if we have a gpr-bank operand < 32b in size and at least
6926 // one other operand is on the fpr bank, then we add cross-bank copies
6927 // to homogenize the operand banks. For simplicity the bank that we choose
6928 // to settle on is whatever bank the def operand has. For example:
6929 //
6930 // %endbb:
6931 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
6932 // =>
6933 // %bb2:
6934 // ...
6935 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
6936 // ...
6937 // %endbb:
6938 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
6939 bool HasGPROp = false, HasFPROp = false;
6940 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
6941 if (!MO.isReg())
6942 continue;
6943 const LLT &Ty = MRI.getType(MO.getReg());
6944 if (!Ty.isValid() || !Ty.isScalar())
6945 break;
6946 if (Ty.getSizeInBits() >= 32)
6947 break;
6948 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6949 // If for some reason we don't have a regbank yet. Don't try anything.
6950 if (!RB)
6951 break;
6952
6953 if (RB->getID() == AArch64::GPRRegBankID)
6954 HasGPROp = true;
6955 else
6956 HasFPROp = true;
6957 }
6958 // We have heterogenous regbanks, need to fixup.
6959 if (HasGPROp && HasFPROp)
6960 fixupPHIOpBanks(*MI, MRI, RBI);
6961 }
6962}
6963
6964namespace llvm {
6965InstructionSelector *
6966createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6967 AArch64Subtarget &Subtarget,
6968 AArch64RegisterBankInfo &RBI) {
6969 return new AArch64InstructionSelector(TM, Subtarget, RBI);
6970}
6971}

/build/source/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/ADT/bit.h"
17#include "llvm/Support/Compiler.h"
18#include <cassert>
19#include <climits>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25namespace llvm {
26
27/// Mathematical constants.
28namespace numbers {
29// TODO: Track C++20 std::numbers.
30// TODO: Favor using the hexadecimal FP constants (requires C++17).
31constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
32 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
33 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
34 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
35 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
36 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
37 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
38 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
39 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
40 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
41 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
42 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
43 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
44 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
45 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
46constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
47 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
48 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
49 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
50 log2ef = 1.44269504F, // (0x1.715476P+0)
51 log10ef = .434294482F, // (0x1.bcb7b2P-2)
52 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
53 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
54 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
55 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
56 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
57 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
58 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
59 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
60 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
61} // namespace numbers
62
63/// Count number of 0's from the least significant bit to the most
64/// stopping at the first 1.
65///
66/// Only unsigned integral types are allowed.
67///
68/// Returns std::numeric_limits<T>::digits on an input of 0.
69template <typename T>
70LLVM_DEPRECATED("Use llvm::countr_zero instead.", "llvm::countr_zero")__attribute__((deprecated("Use llvm::countr_zero instead.", "llvm::countr_zero"
)))
71unsigned countTrailingZeros(T Val) {
72 static_assert(std::is_unsigned_v<T>,
73 "Only unsigned integral types are allowed.");
74 return llvm::countr_zero(Val);
75}
76
77/// Count number of 0's from the most significant bit to the least
78/// stopping at the first 1.
79///
80/// Only unsigned integral types are allowed.
81///
82/// Returns std::numeric_limits<T>::digits on an input of 0.
83template <typename T>
84LLVM_DEPRECATED("Use llvm::countl_zero instead.", "llvm::countl_zero")__attribute__((deprecated("Use llvm::countl_zero instead.", "llvm::countl_zero"
)))
85unsigned countLeadingZeros(T Val) {
86 static_assert(std::is_unsigned_v<T>,
87 "Only unsigned integral types are allowed.");
88 return llvm::countl_zero(Val);
89}
90
91/// Create a bitmask with the N right-most bits set to 1, and all other
92/// bits set to 0. Only unsigned types are allowed.
93template <typename T> T maskTrailingOnes(unsigned N) {
94 static_assert(std::is_unsigned_v<T>, "Invalid type!");
95 const unsigned Bits = CHAR_BIT8 * sizeof(T);
96 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 96, __extension__
__PRETTY_FUNCTION__))
;
97 return N == 0 ? 0 : (T(-1) >> (Bits - N));
98}
99
100/// Create a bitmask with the N left-most bits set to 1, and all other
101/// bits set to 0. Only unsigned types are allowed.
102template <typename T> T maskLeadingOnes(unsigned N) {
103 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
104}
105
106/// Create a bitmask with the N right-most bits set to 0, and all other
107/// bits set to 1. Only unsigned types are allowed.
108template <typename T> T maskTrailingZeros(unsigned N) {
109 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
110}
111
112/// Create a bitmask with the N left-most bits set to 0, and all other
113/// bits set to 1. Only unsigned types are allowed.
114template <typename T> T maskLeadingZeros(unsigned N) {
115 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
116}
117
118/// Macro compressed bit reversal table for 256 bits.
119///
120/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
121static const unsigned char BitReverseTable256[256] = {
122#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
123#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
124#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
125 R6(0), R6(2), R6(1), R6(3)
126#undef R2
127#undef R4
128#undef R6
129};
130
131/// Reverse the bits in \p Val.
132template <typename T> T reverseBits(T Val) {
133#if __has_builtin(__builtin_bitreverse8)1
134 if constexpr (std::is_same_v<T, uint8_t>)
135 return __builtin_bitreverse8(Val);
136#endif
137#if __has_builtin(__builtin_bitreverse16)1
138 if constexpr (std::is_same_v<T, uint16_t>)
139 return __builtin_bitreverse16(Val);
140#endif
141#if __has_builtin(__builtin_bitreverse32)1
142 if constexpr (std::is_same_v<T, uint32_t>)
143 return __builtin_bitreverse32(Val);
144#endif
145#if __has_builtin(__builtin_bitreverse64)1
146 if constexpr (std::is_same_v<T, uint64_t>)
147 return __builtin_bitreverse64(Val);
148#endif
149
150 unsigned char in[sizeof(Val)];
151 unsigned char out[sizeof(Val)];
152 std::memcpy(in, &Val, sizeof(Val));
153 for (unsigned i = 0; i < sizeof(Val); ++i)
154 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
155 std::memcpy(&Val, out, sizeof(Val));
156 return Val;
157}
158
159// NOTE: The following support functions use the _32/_64 extensions instead of
160// type overloading so that signed and unsigned integers can be used without
161// ambiguity.
162
163/// Return the high 32 bits of a 64 bit value.
164constexpr inline uint32_t Hi_32(uint64_t Value) {
165 return static_cast<uint32_t>(Value >> 32);
166}
167
168/// Return the low 32 bits of a 64 bit value.
169constexpr inline uint32_t Lo_32(uint64_t Value) {
170 return static_cast<uint32_t>(Value);
171}
172
173/// Make a 64-bit integer from a high / low pair of 32-bit integers.
174constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
175 return ((uint64_t)High << 32) | (uint64_t)Low;
176}
177
178/// Checks if an integer fits into the given bit width.
179template <unsigned N> constexpr inline bool isInt(int64_t x) {
180 if constexpr (N == 8)
181 return static_cast<int8_t>(x) == x;
182 if constexpr (N == 16)
183 return static_cast<int16_t>(x) == x;
184 if constexpr (N == 32)
185 return static_cast<int32_t>(x) == x;
186 if constexpr (N < 64)
187 return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1));
188 (void)x; // MSVC v19.25 warns that x is unused.
189 return true;
190}
191
192/// Checks if a signed integer is an N bit number shifted left by S.
193template <unsigned N, unsigned S>
194constexpr inline bool isShiftedInt(int64_t x) {
195 static_assert(
196 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
197 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
198 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
199}
200
201/// Checks if an unsigned integer fits into the given bit width.
202template <unsigned N> constexpr inline bool isUInt(uint64_t x) {
203 static_assert(N > 0, "isUInt<0> doesn't make sense");
204 if constexpr (N == 8)
205 return static_cast<uint8_t>(x) == x;
206 if constexpr (N == 16)
207 return static_cast<uint16_t>(x) == x;
208 if constexpr (N == 32)
209 return static_cast<uint32_t>(x) == x;
210 if constexpr (N < 64)
211 return x < (UINT64_C(1)1UL << (N));
212 (void)x; // MSVC v19.25 warns that x is unused.
213 return true;
214}
215
216/// Checks if a unsigned integer is an N bit number shifted left by S.
217template <unsigned N, unsigned S>
218constexpr inline bool isShiftedUInt(uint64_t x) {
219 static_assert(
220 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
221 static_assert(N + S <= 64,
222 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
223 // Per the two static_asserts above, S must be strictly less than 64. So
224 // 1 << S is not undefined behavior.
225 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
226}
227
228/// Gets the maximum value for a N-bit unsigned integer.
229inline uint64_t maxUIntN(uint64_t N) {
230 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 230, __extension__
__PRETTY_FUNCTION__))
;
231
232 // uint64_t(1) << 64 is undefined behavior, so we can't do
233 // (uint64_t(1) << N) - 1
234 // without checking first that N != 64. But this works and doesn't have a
235 // branch.
236 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
237}
238
239/// Gets the minimum value for a N-bit signed integer.
240inline int64_t minIntN(int64_t N) {
241 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 241, __extension__
__PRETTY_FUNCTION__))
;
242
243 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
244}
245
246/// Gets the maximum value for a N-bit signed integer.
247inline int64_t maxIntN(int64_t N) {
248 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 248, __extension__
__PRETTY_FUNCTION__))
;
249
250 // This relies on two's complement wraparound when N == 64, so we convert to
251 // int64_t only at the very end to avoid UB.
252 return (UINT64_C(1)1UL << (N - 1)) - 1;
253}
254
255/// Checks if an unsigned integer fits into the given (dynamic) bit width.
256inline bool isUIntN(unsigned N, uint64_t x) {
257 return N >= 64 || x <= maxUIntN(N);
258}
259
260/// Checks if an signed integer fits into the given (dynamic) bit width.
261inline bool isIntN(unsigned N, int64_t x) {
262 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
263}
264
265/// Return true if the argument is a non-empty sequence of ones starting at the
266/// least significant bit with the remainder zero (32 bit version).
267/// Ex. isMask_32(0x0000FFFFU) == true.
268constexpr inline bool isMask_32(uint32_t Value) {
269 return Value && ((Value + 1) & Value) == 0;
270}
271
272/// Return true if the argument is a non-empty sequence of ones starting at the
273/// least significant bit with the remainder zero (64 bit version).
274constexpr inline bool isMask_64(uint64_t Value) {
275 return Value && ((Value + 1) & Value) == 0;
276}
277
278/// Return true if the argument contains a non-empty sequence of ones with the
279/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
280constexpr inline bool isShiftedMask_32(uint32_t Value) {
281 return Value && isMask_32((Value - 1) | Value);
282}
283
284/// Return true if the argument contains a non-empty sequence of ones with the
285/// remainder zero (64 bit version.)
286constexpr inline bool isShiftedMask_64(uint64_t Value) {
287 return Value && isMask_64((Value - 1) | Value);
288}
289
290/// Return true if the argument is a power of two > 0.
291/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
292constexpr inline bool isPowerOf2_32(uint32_t Value) {
293 return llvm::has_single_bit(Value);
294}
295
296/// Return true if the argument is a power of two > 0 (64 bit edition.)
297constexpr inline bool isPowerOf2_64(uint64_t Value) {
298 return llvm::has_single_bit(Value);
299}
300
301/// Count the number of ones from the most significant bit to the first
302/// zero bit.
303///
304/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
305/// Only unsigned integral types are allowed.
306///
307/// Returns std::numeric_limits<T>::digits on an input of all ones.
308template <typename T>
309LLVM_DEPRECATED("Use llvm::countl_one instead.", "llvm::countl_one")__attribute__((deprecated("Use llvm::countl_one instead.", "llvm::countl_one"
)))
310unsigned countLeadingOnes(T Value) {
311 static_assert(std::is_unsigned_v<T>,
312 "Only unsigned integral types are allowed.");
313 return llvm::countl_one<T>(Value);
314}
315
316/// Count the number of ones from the least significant bit to the first
317/// zero bit.
318///
319/// Ex. countTrailingOnes(0x00FF00FF) == 8.
320/// Only unsigned integral types are allowed.
321///
322/// Returns std::numeric_limits<T>::digits on an input of all ones.
323template <typename T>
324LLVM_DEPRECATED("Use llvm::countr_one instead.", "llvm::countr_one")__attribute__((deprecated("Use llvm::countr_one instead.", "llvm::countr_one"
)))
325unsigned countTrailingOnes(T Value) {
326 static_assert(std::is_unsigned_v<T>,
327 "Only unsigned integral types are allowed.");
328 return llvm::countr_one<T>(Value);
329}
330
331/// Count the number of set bits in a value.
332/// Ex. countPopulation(0xF000F000) = 8
333/// Returns 0 if the word is zero.
334template <typename T>
335LLVM_DEPRECATED("Use llvm::popcount instead.", "llvm::popcount")__attribute__((deprecated("Use llvm::popcount instead.", "llvm::popcount"
)))
336inline unsigned countPopulation(T Value) {
337 static_assert(std::is_unsigned_v<T>,
338 "Only unsigned integral types are allowed.");
339 return (unsigned)llvm::popcount(Value);
340}
341
342/// Return true if the argument contains a non-empty sequence of ones with the
343/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
344/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
345/// MaskLen is updated to specify the length of the mask, else neither are
346/// updated.
347inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
348 unsigned &MaskLen) {
349 if (!isShiftedMask_32(Value))
350 return false;
351 MaskIdx = llvm::countr_zero(Value);
352 MaskLen = llvm::popcount(Value);
353 return true;
354}
355
356/// Return true if the argument contains a non-empty sequence of ones with the
357/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
358/// of the lowest set bit and \p MaskLen is updated to specify the length of the
359/// mask, else neither are updated.
360inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
361 unsigned &MaskLen) {
362 if (!isShiftedMask_64(Value))
363 return false;
364 MaskIdx = llvm::countr_zero(Value);
365 MaskLen = llvm::popcount(Value);
366 return true;
367}
368
369/// Compile time Log2.
370/// Valid only for positive powers of two.
371template <size_t kValue> constexpr inline size_t CTLog2() {
372 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
373 "Value is not a valid power of 2");
374 return 1 + CTLog2<kValue / 2>();
375}
376
377template <> constexpr inline size_t CTLog2<1>() { return 0; }
378
379/// Return the floor log base 2 of the specified value, -1 if the value is zero.
380/// (32 bit edition.)
381/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
382inline unsigned Log2_32(uint32_t Value) {
383 return 31 - llvm::countl_zero(Value);
23
Returning the value 4294967295
384}
385
386/// Return the floor log base 2 of the specified value, -1 if the value is zero.
387/// (64 bit edition.)
388inline unsigned Log2_64(uint64_t Value) {
389 return 63 - llvm::countl_zero(Value);
390}
391
392/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
393/// (32 bit edition).
394/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
395inline unsigned Log2_32_Ceil(uint32_t Value) {
396 return 32 - llvm::countl_zero(Value - 1);
397}
398
399/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
400/// (64 bit edition.)
401inline unsigned Log2_64_Ceil(uint64_t Value) {
402 return 64 - llvm::countl_zero(Value - 1);
403}
404
405/// This function takes a 64-bit integer and returns the bit equivalent double.
406LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<double>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<double>"
)))
407inline double BitsToDouble(uint64_t Bits) {
408 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
409 return llvm::bit_cast<double>(Bits);
410}
411
412/// This function takes a 32-bit integer and returns the bit equivalent float.
413LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<float>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<float>"
)))
414inline float BitsToFloat(uint32_t Bits) {
415 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
416 return llvm::bit_cast<float>(Bits);
417}
418
419/// This function takes a double and returns the bit equivalent 64-bit integer.
420/// Note that copying doubles around changes the bits of NaNs on some hosts,
421/// notably x86, so this routine cannot be used if these bits are needed.
422LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>"
)))
423inline uint64_t DoubleToBits(double Double) {
424 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
425 return llvm::bit_cast<uint64_t>(Double);
426}
427
428/// This function takes a float and returns the bit equivalent 32-bit integer.
429/// Note that copying floats around changes the bits of NaNs on some hosts,
430/// notably x86, so this routine cannot be used if these bits are needed.
431LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>"
)))
432inline uint32_t FloatToBits(float Float) {
433 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
434 return llvm::bit_cast<uint32_t>(Float);
435}
436
437/// A and B are either alignments or offsets. Return the minimum alignment that
438/// may be assumed after adding the two together.
439constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
440 // The largest power of 2 that divides both A and B.
441 //
442 // Replace "-Value" by "1+~Value" in the following commented code to avoid
443 // MSVC warning C4146
444 // return (A | B) & -(A | B);
445 return (A | B) & (1 + ~(A | B));
446}
447
448/// Returns the next power of two (in 64-bits) that is strictly greater than A.
449/// Returns zero on overflow.
450constexpr inline uint64_t NextPowerOf2(uint64_t A) {
451 A |= (A >> 1);
452 A |= (A >> 2);
453 A |= (A >> 4);
454 A |= (A >> 8);
455 A |= (A >> 16);
456 A |= (A >> 32);
457 return A + 1;
458}
459
460/// Returns the power of two which is less than or equal to the given value.
461/// Essentially, it is a floor operation across the domain of powers of two.
462LLVM_DEPRECATED("use llvm::bit_floor instead", "llvm::bit_floor")__attribute__((deprecated("use llvm::bit_floor instead", "llvm::bit_floor"
)))
463inline uint64_t PowerOf2Floor(uint64_t A) {
464 return llvm::bit_floor(A);
465}
466
467/// Returns the power of two which is greater than or equal to the given value.
468/// Essentially, it is a ceil operation across the domain of powers of two.
469inline uint64_t PowerOf2Ceil(uint64_t A) {
470 if (!A)
471 return 0;
472 return NextPowerOf2(A - 1);
473}
474
475/// Returns the next integer (mod 2**64) that is greater than or equal to
476/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
477///
478/// Examples:
479/// \code
480/// alignTo(5, 8) = 8
481/// alignTo(17, 8) = 24
482/// alignTo(~0LL, 8) = 0
483/// alignTo(321, 255) = 510
484/// \endcode
485inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
486 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 486, __extension__
__PRETTY_FUNCTION__))
;
487 return (Value + Align - 1) / Align * Align;
488}
489
490inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) {
491 assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 492, __extension__
__PRETTY_FUNCTION__))
492 "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 492, __extension__
__PRETTY_FUNCTION__))
;
493 return (Value + Align - 1) & -Align;
494}
495
496/// If non-zero \p Skew is specified, the return value will be a minimal integer
497/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
498/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
499/// Skew mod \p A'. \p Align must be non-zero.
500///
501/// Examples:
502/// \code
503/// alignTo(5, 8, 7) = 7
504/// alignTo(17, 8, 1) = 17
505/// alignTo(~0LL, 8, 3) = 3
506/// alignTo(321, 255, 42) = 552
507/// \endcode
508inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) {
509 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 509, __extension__
__PRETTY_FUNCTION__))
;
510 Skew %= Align;
511 return alignTo(Value - Skew, Align) + Skew;
512}
513
514/// Returns the next integer (mod 2**64) that is greater than or equal to
515/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
516template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
517 static_assert(Align != 0u, "Align must be non-zero");
518 return (Value + Align - 1) / Align * Align;
519}
520
521/// Returns the integer ceil(Numerator / Denominator).
522inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
523 return alignTo(Numerator, Denominator) / Denominator;
524}
525
526/// Returns the integer nearest(Numerator / Denominator).
527inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
528 return (Numerator + (Denominator / 2)) / Denominator;
529}
530
531/// Returns the largest uint64_t less than or equal to \p Value and is
532/// \p Skew mod \p Align. \p Align must be non-zero
533inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
534 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 534, __extension__
__PRETTY_FUNCTION__))
;
535 Skew %= Align;
536 return (Value - Skew) / Align * Align + Skew;
537}
538
539/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
540/// Requires 0 < B <= 32.
541template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
542 static_assert(B > 0, "Bit width can't be 0.");
543 static_assert(B <= 32, "Bit width out of range.");
544 return int32_t(X << (32 - B)) >> (32 - B);
545}
546
547/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
548/// Requires 0 < B <= 32.
549inline int32_t SignExtend32(uint32_t X, unsigned B) {
550 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 550, __extension__
__PRETTY_FUNCTION__))
;
551 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 551, __extension__
__PRETTY_FUNCTION__))
;
552 return int32_t(X << (32 - B)) >> (32 - B);
553}
554
555/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
556/// Requires 0 < B <= 64.
557template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
558 static_assert(B > 0, "Bit width can't be 0.");
559 static_assert(B <= 64, "Bit width out of range.");
560 return int64_t(x << (64 - B)) >> (64 - B);
561}
562
563/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
564/// Requires 0 < B <= 64.
565inline int64_t SignExtend64(uint64_t X, unsigned B) {
566 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 566, __extension__
__PRETTY_FUNCTION__))
;
567 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 567, __extension__
__PRETTY_FUNCTION__))
;
568 return int64_t(X << (64 - B)) >> (64 - B);
569}
570
571/// Subtract two unsigned integers, X and Y, of type T and return the absolute
572/// value of the result.
573template <typename T>
574std::enable_if_t<std::is_unsigned_v<T>, T> AbsoluteDifference(T X, T Y) {
575 return X > Y ? (X - Y) : (Y - X);
576}
577
578/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
579/// maximum representable value of T on overflow. ResultOverflowed indicates if
580/// the result is larger than the maximum representable value of type T.
581template <typename T>
582std::enable_if_t<std::is_unsigned_v<T>, T>
583SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
584 bool Dummy;
585 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
586 // Hacker's Delight, p. 29
587 T Z = X + Y;
588 Overflowed = (Z < X || Z < Y);
589 if (Overflowed)
590 return std::numeric_limits<T>::max();
591 else
592 return Z;
593}
594
595/// Add multiple unsigned integers of type T. Clamp the result to the
596/// maximum representable value of T on overflow.
597template <class T, class... Ts>
598std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z,
599 Ts... Args) {
600 bool Overflowed = false;
601 T XY = SaturatingAdd(X, Y, &Overflowed);
602 if (Overflowed)
603 return SaturatingAdd(std::numeric_limits<T>::max(), T(1), Args...);
604 return SaturatingAdd(XY, Z, Args...);
605}
606
607/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
608/// maximum representable value of T on overflow. ResultOverflowed indicates if
609/// the result is larger than the maximum representable value of type T.
610template <typename T>
611std::enable_if_t<std::is_unsigned_v<T>, T>
612SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
613 bool Dummy;
614 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
615
616 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
617 // because it fails for uint16_t (where multiplication can have undefined
618 // behavior due to promotion to int), and requires a division in addition
619 // to the multiplication.
620
621 Overflowed = false;
622
623 // Log2(Z) would be either Log2Z or Log2Z + 1.
624 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
625 // will necessarily be less than Log2Max as desired.
626 int Log2Z = Log2_64(X) + Log2_64(Y);
627 const T Max = std::numeric_limits<T>::max();
628 int Log2Max = Log2_64(Max);
629 if (Log2Z < Log2Max) {
630 return X * Y;
631 }
632 if (Log2Z > Log2Max) {
633 Overflowed = true;
634 return Max;
635 }
636
637 // We're going to use the top bit, and maybe overflow one
638 // bit past it. Multiply all but the bottom bit then add
639 // that on at the end.
640 T Z = (X >> 1) * Y;
641 if (Z & ~(Max >> 1)) {
642 Overflowed = true;
643 return Max;
644 }
645 Z <<= 1;
646 if (X & 1)
647 return SaturatingAdd(Z, Y, ResultOverflowed);
648
649 return Z;
650}
651
652/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
653/// the product. Clamp the result to the maximum representable value of T on
654/// overflow. ResultOverflowed indicates if the result is larger than the
655/// maximum representable value of type T.
656template <typename T>
657std::enable_if_t<std::is_unsigned_v<T>, T>
658SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
659 bool Dummy;
660 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
661
662 T Product = SaturatingMultiply(X, Y, &Overflowed);
663 if (Overflowed)
664 return Product;
665
666 return SaturatingAdd(A, Product, &Overflowed);
667}
668
669/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
670extern const float huge_valf;
671
672
673/// Add two signed integers, computing the two's complement truncated result,
674/// returning true if overflow occurred.
675template <typename T>
676std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) {
677#if __has_builtin(__builtin_add_overflow)1
678 return __builtin_add_overflow(X, Y, &Result);
679#else
680 // Perform the unsigned addition.
681 using U = std::make_unsigned_t<T>;
682 const U UX = static_cast<U>(X);
683 const U UY = static_cast<U>(Y);
684 const U UResult = UX + UY;
685
686 // Convert to signed.
687 Result = static_cast<T>(UResult);
688
689 // Adding two positive numbers should result in a positive number.
690 if (X > 0 && Y > 0)
691 return Result <= 0;
692 // Adding two negatives should result in a negative number.
693 if (X < 0 && Y < 0)
694 return Result >= 0;
695 return false;
696#endif
697}
698
699/// Subtract two signed integers, computing the two's complement truncated
700/// result, returning true if an overflow ocurred.
701template <typename T>
702std::enable_if_t<std::is_signed_v<T>, T> SubOverflow(T X, T Y, T &Result) {
703#if __has_builtin(__builtin_sub_overflow)1
704 return __builtin_sub_overflow(X, Y, &Result);
705#else
706 // Perform the unsigned addition.
707 using U = std::make_unsigned_t<T>;
708 const U UX = static_cast<U>(X);
709 const U UY = static_cast<U>(Y);
710 const U UResult = UX - UY;
711
712 // Convert to signed.
713 Result = static_cast<T>(UResult);
714
715 // Subtracting a positive number from a negative results in a negative number.
716 if (X <= 0 && Y > 0)
717 return Result >= 0;
718 // Subtracting a negative number from a positive results in a positive number.
719 if (X >= 0 && Y < 0)
720 return Result <= 0;
721 return false;
722#endif
723}
724
725/// Multiply two signed integers, computing the two's complement truncated
726/// result, returning true if an overflow ocurred.
727template <typename T>
728std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
729 // Perform the unsigned multiplication on absolute values.
730 using U = std::make_unsigned_t<T>;
731 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
732 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
733 const U UResult = UX * UY;
734
735 // Convert to signed.
736 const bool IsNegative = (X < 0) ^ (Y < 0);
737 Result = IsNegative ? (0 - UResult) : UResult;
738
739 // If any of the args was 0, result is 0 and no overflow occurs.
740 if (UX == 0 || UY == 0)
741 return false;
742
743 // UX and UY are in [1, 2^n], where n is the number of digits.
744 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
745 // positive) divided by an argument compares to the other.
746 if (IsNegative)
747 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
748 else
749 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
750}
751
752} // End llvm namespace
753
754#endif