Bug Summary

File:build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 3604, column 30
The left operand of '==' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/source/llvm/lib/Target/AArch64 -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/BinaryFormat/Dwarf.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
27#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29#include "llvm/CodeGen/GlobalISel/Utils.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstr.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/TargetOpcodes.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicsAArch64.h"
44#include "llvm/IR/PatternMatch.h"
45#include "llvm/IR/Type.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/raw_ostream.h"
49#include <optional>
50
51#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
52
53using namespace llvm;
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
56
57namespace llvm {
58class BlockFrequencyInfo;
59class ProfileSummaryInfo;
60}
61
62namespace {
63
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
67
68
69class AArch64InstructionSelector : public InstructionSelector {
70public:
71 AArch64InstructionSelector(const AArch64TargetMachine &TM,
72 const AArch64Subtarget &STI,
73 const AArch64RegisterBankInfo &RBI);
74
75 bool select(MachineInstr &I) override;
76 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
77
78 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
79 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
80 BlockFrequencyInfo *BFI) override {
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
82 MIB.setMF(MF);
83
84 // hasFnAttribute() is expensive to call on every BRCOND selection, so
85 // cache it here for each run of the selector.
86 ProduceNonFlagSettingCondBr =
87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
88 MFReturnAddr = Register();
89
90 processPHIs(MF);
91 }
92
93private:
94 /// tblgen-erated 'select' implementation, used as the initial selector for
95 /// the patterns that don't require complex C++.
96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
97
98 // A lowering phase that runs before any selection attempts.
99 // Returns true if the instruction was modified.
100 bool preISelLower(MachineInstr &I);
101
102 // An early selection function that runs before the selectImpl() call.
103 bool earlySelect(MachineInstr &I);
104
105 // Do some preprocessing of G_PHIs before we begin selection.
106 void processPHIs(MachineFunction &MF);
107
108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
109
110 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
111 bool contractCrossBankCopyIntoStore(MachineInstr &I,
112 MachineRegisterInfo &MRI);
113
114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
117 MachineRegisterInfo &MRI) const;
118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
119 MachineRegisterInfo &MRI) const;
120
121 ///@{
122 /// Helper functions for selectCompareBranch.
123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
124 MachineIRBuilder &MIB) const;
125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
126 MachineIRBuilder &MIB) const;
127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
128 MachineIRBuilder &MIB) const;
129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
130 MachineBasicBlock *DstMBB,
131 MachineIRBuilder &MIB) const;
132 ///@}
133
134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
135 MachineRegisterInfo &MRI);
136
137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
139
140 // Helper to generate an equivalent of scalar_to_vector into a new register,
141 // returned via 'Dst'.
142 MachineInstr *emitScalarToVector(unsigned EltSize,
143 const TargetRegisterClass *DstRC,
144 Register Scalar,
145 MachineIRBuilder &MIRBuilder) const;
146
147 /// Emit a lane insert into \p DstReg, or a new vector register if
148 /// std::nullopt is provided.
149 ///
150 /// The lane inserted into is defined by \p LaneIdx. The vector source
151 /// register is given by \p SrcReg. The register containing the element is
152 /// given by \p EltReg.
153 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
154 Register EltReg, unsigned LaneIdx,
155 const RegisterBank &RB,
156 MachineIRBuilder &MIRBuilder) const;
157
158 /// Emit a sequence of instructions representing a constant \p CV for a
159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
160 ///
161 /// \returns the last instruction in the sequence on success, and nullptr
162 /// otherwise.
163 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
164 MachineIRBuilder &MIRBuilder,
165 MachineRegisterInfo &MRI);
166
167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
169 MachineRegisterInfo &MRI);
170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
171 /// SUBREG_TO_REG.
172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
176
177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
181
182 /// Helper function to select vector load intrinsics like
183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
184 /// \p Opc is the opcode that the selected instruction should use.
185 /// \p NumVecs is the number of vector destinations for the instruction.
186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
188 MachineInstr &I);
189 bool selectIntrinsicWithSideEffects(MachineInstr &I,
190 MachineRegisterInfo &MRI);
191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
201
202 unsigned emitConstantPoolEntry(const Constant *CPVal,
203 MachineFunction &MF) const;
204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
205 MachineIRBuilder &MIRBuilder) const;
206
207 // Emit a vector concat operation.
208 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
209 Register Op2,
210 MachineIRBuilder &MIRBuilder) const;
211
212 // Emit an integer compare between LHS and RHS, which checks for Predicate.
213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
214 MachineOperand &Predicate,
215 MachineIRBuilder &MIRBuilder) const;
216
217 /// Emit a floating point comparison between \p LHS and \p RHS.
218 /// \p Pred if given is the intended predicate to use.
219 MachineInstr *
220 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
221 std::optional<CmpInst::Predicate> = std::nullopt) const;
222
223 MachineInstr *
224 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
226 MachineIRBuilder &MIRBuilder,
227 const ComplexRendererFns &RenderFns = std::nullopt) const;
228 /// Helper function to emit an add or sub instruction.
229 ///
230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
231 /// in a specific order.
232 ///
233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
234 ///
235 /// \code
236 /// const std::array<std::array<unsigned, 2>, 4> Table {
237 /// {{AArch64::ADDXri, AArch64::ADDWri},
238 /// {AArch64::ADDXrs, AArch64::ADDWrs},
239 /// {AArch64::ADDXrr, AArch64::ADDWrr},
240 /// {AArch64::SUBXri, AArch64::SUBWri},
241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
242 /// \endcode
243 ///
244 /// Each row in the table corresponds to a different addressing mode. Each
245 /// column corresponds to a different register size.
246 ///
247 /// \attention Rows must be structured as follows:
248 /// - Row 0: The ri opcode variants
249 /// - Row 1: The rs opcode variants
250 /// - Row 2: The rr opcode variants
251 /// - Row 3: The ri opcode variants for negative immediates
252 /// - Row 4: The rx opcode variants
253 ///
254 /// \attention Columns must be structured as follows:
255 /// - Column 0: The 64-bit opcode variants
256 /// - Column 1: The 32-bit opcode variants
257 ///
258 /// \p Dst is the destination register of the binop to emit.
259 /// \p LHS is the left-hand operand of the binop to emit.
260 /// \p RHS is the right-hand operand of the binop to emit.
261 MachineInstr *emitAddSub(
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
263 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
264 MachineIRBuilder &MIRBuilder) const;
265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
266 MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
273 MachineIRBuilder &MIRBuilder) const;
274 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
275 MachineIRBuilder &MIRBuilder) const;
276 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
277 AArch64CC::CondCode CC,
278 MachineIRBuilder &MIRBuilder) const;
279 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
280 const RegisterBank &DstRB, LLT ScalarTy,
281 Register VecReg, unsigned LaneIdx,
282 MachineIRBuilder &MIRBuilder) const;
283 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
284 AArch64CC::CondCode Pred,
285 MachineIRBuilder &MIRBuilder) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
301 /// In some cases this is even possible with OR operations in the expression.
302 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
303 MachineIRBuilder &MIB) const;
304 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
305 CmpInst::Predicate CC,
306 AArch64CC::CondCode Predicate,
307 AArch64CC::CondCode OutCC,
308 MachineIRBuilder &MIB) const;
309 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
310 bool Negate, Register CCOp,
311 AArch64CC::CondCode Predicate,
312 MachineIRBuilder &MIB) const;
313
314 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
315 /// \p IsNegative is true if the test should be "not zero".
316 /// This will also optimize the test bit instruction when possible.
317 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
318 MachineBasicBlock *DstMBB,
319 MachineIRBuilder &MIB) const;
320
321 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
322 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
323 MachineBasicBlock *DestMBB,
324 MachineIRBuilder &MIB) const;
325
326 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
327 // We use these manually instead of using the importer since it doesn't
328 // support SDNodeXForm.
329 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
330 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
331 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
332 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
333
334 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
335 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
336 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
337
338 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
339 unsigned Size) const;
340
341 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
342 return selectAddrModeUnscaled(Root, 1);
343 }
344 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
345 return selectAddrModeUnscaled(Root, 2);
346 }
347 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
348 return selectAddrModeUnscaled(Root, 4);
349 }
350 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
351 return selectAddrModeUnscaled(Root, 8);
352 }
353 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
354 return selectAddrModeUnscaled(Root, 16);
355 }
356
357 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
358 /// from complex pattern matchers like selectAddrModeIndexed().
359 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
360 MachineRegisterInfo &MRI) const;
361
362 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
363 unsigned Size) const;
364 template <int Width>
365 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
366 return selectAddrModeIndexed(Root, Width / 8);
367 }
368
369 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
370 const MachineRegisterInfo &MRI) const;
371 ComplexRendererFns
372 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
373 unsigned SizeInBytes) const;
374
375 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
376 /// or not a shift + extend should be folded into an addressing mode. Returns
377 /// None when this is not profitable or possible.
378 ComplexRendererFns
379 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
380 MachineOperand &Offset, unsigned SizeInBytes,
381 bool WantsExt) const;
382 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
383 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
384 unsigned SizeInBytes) const;
385 template <int Width>
386 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
387 return selectAddrModeXRO(Root, Width / 8);
388 }
389
390 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
391 unsigned SizeInBytes) const;
392 template <int Width>
393 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
394 return selectAddrModeWRO(Root, Width / 8);
395 }
396
397 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
398 bool AllowROR = false) const;
399
400 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
401 return selectShiftedRegister(Root);
402 }
403
404 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
405 return selectShiftedRegister(Root, true);
406 }
407
408 /// Given an extend instruction, determine the correct shift-extend type for
409 /// that instruction.
410 ///
411 /// If the instruction is going to be used in a load or store, pass
412 /// \p IsLoadStore = true.
413 AArch64_AM::ShiftExtendType
414 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
415 bool IsLoadStore = false) const;
416
417 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
418 ///
419 /// \returns Either \p Reg if no change was necessary, or the new register
420 /// created by moving \p Reg.
421 ///
422 /// Note: This uses emitCopy right now.
423 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
424 MachineIRBuilder &MIB) const;
425
426 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
427
428 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
429 int OpIdx = -1) const;
430 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
431 int OpIdx = -1) const;
432 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
433 int OpIdx = -1) const;
434 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
435 int OpIdx = -1) const;
436 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
437 int OpIdx = -1) const;
438 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
439 int OpIdx = -1) const;
440 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
441 const MachineInstr &MI,
442 int OpIdx = -1) const;
443
444 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
445 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
446
447 // Optimization methods.
448 bool tryOptSelect(GSelect &Sel);
449 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
450 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
451 MachineOperand &Predicate,
452 MachineIRBuilder &MIRBuilder) const;
453
454 /// Return true if \p MI is a load or store of \p NumBytes bytes.
455 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
456
457 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
458 /// register zeroed out. In other words, the result of MI has been explicitly
459 /// zero extended.
460 bool isDef32(const MachineInstr &MI) const;
461
462 const AArch64TargetMachine &TM;
463 const AArch64Subtarget &STI;
464 const AArch64InstrInfo &TII;
465 const AArch64RegisterInfo &TRI;
466 const AArch64RegisterBankInfo &RBI;
467
468 bool ProduceNonFlagSettingCondBr = false;
469
470 // Some cached values used during selection.
471 // We use LR as a live-in register, and we keep track of it here as it can be
472 // clobbered by calls.
473 Register MFReturnAddr;
474
475 MachineIRBuilder MIB;
476
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
480
481// We declare the temporaries used by selectImpl() in the class to minimize the
482// cost of constructing placeholder values.
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
486};
487
488} // end anonymous namespace
489
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
493
494AArch64InstructionSelector::AArch64InstructionSelector(
495 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
496 const AArch64RegisterBankInfo &RBI)
497 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
498 RBI(RBI),
499#define GET_GLOBALISEL_PREDICATES_INIT
500#include "AArch64GenGlobalISel.inc"
501#undef GET_GLOBALISEL_PREDICATES_INIT
502#define GET_GLOBALISEL_TEMPORARIES_INIT
503#include "AArch64GenGlobalISel.inc"
504#undef GET_GLOBALISEL_TEMPORARIES_INIT
505{
506}
507
508// FIXME: This should be target-independent, inferred from the types declared
509// for each class in the bank.
510//
511/// Given a register bank, and a type, return the smallest register class that
512/// can represent that combination.
513static const TargetRegisterClass *
514getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
515 bool GetAllRegSet = false) {
516 if (RB.getID() == AArch64::GPRRegBankID) {
517 if (Ty.getSizeInBits() <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (Ty.getSizeInBits() == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
523 if (Ty.getSizeInBits() == 128)
524 return &AArch64::XSeqPairsClassRegClass;
525 return nullptr;
526 }
527
528 if (RB.getID() == AArch64::FPRRegBankID) {
529 switch (Ty.getSizeInBits()) {
530 case 8:
531 return &AArch64::FPR8RegClass;
532 case 16:
533 return &AArch64::FPR16RegClass;
534 case 32:
535 return &AArch64::FPR32RegClass;
536 case 64:
537 return &AArch64::FPR64RegClass;
538 case 128:
539 return &AArch64::FPR128RegClass;
540 }
541 return nullptr;
542 }
543
544 return nullptr;
545}
546
547/// Given a register bank, and size in bits, return the smallest register class
548/// that can represent that combination.
549static const TargetRegisterClass *
550getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
551 bool GetAllRegSet = false) {
552 unsigned RegBankID = RB.getID();
553
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
563 }
564
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
567 default:
568 return nullptr;
569 case 8:
570 return &AArch64::FPR8RegClass;
571 case 16:
572 return &AArch64::FPR16RegClass;
573 case 32:
574 return &AArch64::FPR32RegClass;
575 case 64:
576 return &AArch64::FPR64RegClass;
577 case 128:
578 return &AArch64::FPR128RegClass;
579 }
580 }
581
582 return nullptr;
583}
584
585/// Returns the correct subregister to use for a given register class.
586static bool getSubRegForClass(const TargetRegisterClass *RC,
587 const TargetRegisterInfo &TRI, unsigned &SubReg) {
588 switch (TRI.getRegSizeInBits(*RC)) {
589 case 8:
590 SubReg = AArch64::bsub;
591 break;
592 case 16:
593 SubReg = AArch64::hsub;
594 break;
595 case 32:
596 if (RC != &AArch64::FPR32RegClass)
597 SubReg = AArch64::sub_32;
598 else
599 SubReg = AArch64::ssub;
600 break;
601 case 64:
602 SubReg = AArch64::dsub;
603 break;
604 default:
605 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
606 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
607 return false;
608 }
609
610 return true;
611}
612
613/// Returns the minimum size the given register bank can hold.
614static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
615 switch (RB.getID()) {
616 case AArch64::GPRRegBankID:
617 return 32;
618 case AArch64::FPRRegBankID:
619 return 8;
620 default:
621 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 621)
;
622 }
623}
624
625/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
626/// Helper function for functions like createDTuple and createQTuple.
627///
628/// \p RegClassIDs - The list of register class IDs available for some tuple of
629/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
630/// expected to contain between 2 and 4 tuple classes.
631///
632/// \p SubRegs - The list of subregister classes associated with each register
633/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
634/// subregister class. The index of each subregister class is expected to
635/// correspond with the index of each register class.
636///
637/// \returns Either the destination register of REG_SEQUENCE instruction that
638/// was created, or the 0th element of \p Regs if \p Regs contains a single
639/// element.
640static Register createTuple(ArrayRef<Register> Regs,
641 const unsigned RegClassIDs[],
642 const unsigned SubRegs[], MachineIRBuilder &MIB) {
643 unsigned NumRegs = Regs.size();
644 if (NumRegs == 1)
645 return Regs[0];
646 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
647 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 647, __extension__ __PRETTY_FUNCTION__))
;
648 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
649 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
650 auto RegSequence =
651 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
653 RegSequence.addUse(Regs[I]);
654 RegSequence.addImm(SubRegs[I]);
655 }
656 return RegSequence.getReg(0);
657}
658
659/// Create a tuple of D-registers using the registers in \p Regs.
660static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
666}
667
668/// Create a tuple of Q-registers using the registers in \p Regs.
669static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
675}
676
677static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
678 auto &MI = *Root.getParent();
679 auto &MBB = *MI.getParent();
680 auto &MF = *MBB.getParent();
681 auto &MRI = MF.getRegInfo();
682 uint64_t Immed;
683 if (Root.isImm())
684 Immed = Root.getImm();
685 else if (Root.isCImm())
686 Immed = Root.getCImm()->getZExtValue();
687 else if (Root.isReg()) {
688 auto ValAndVReg =
689 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
690 if (!ValAndVReg)
691 return std::nullopt;
692 Immed = ValAndVReg->Value.getSExtValue();
693 } else
694 return std::nullopt;
695 return Immed;
696}
697
698/// Check whether \p I is a currently unsupported binary operation:
699/// - it has an unsized type
700/// - an operand is not a vreg
701/// - all operands are not in the same bank
702/// These are checks that should someday live in the verifier, but right now,
703/// these are mostly limitations of the aarch64 selector.
704static bool unsupportedBinOp(const MachineInstr &I,
705 const AArch64RegisterBankInfo &RBI,
706 const MachineRegisterInfo &MRI,
707 const AArch64RegisterInfo &TRI) {
708 LLT Ty = MRI.getType(I.getOperand(0).getReg());
709 if (!Ty.isValid()) {
710 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
711 return true;
712 }
713
714 const RegisterBank *PrevOpBank = nullptr;
715 for (auto &MO : I.operands()) {
716 // FIXME: Support non-register operands.
717 if (!MO.isReg()) {
718 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
719 return true;
720 }
721
722 // FIXME: Can generic operations have physical registers operands? If
723 // so, this will need to be taught about that, and we'll need to get the
724 // bank out of the minimal class for the register.
725 // Either way, this needs to be documented (and possibly verified).
726 if (!MO.getReg().isVirtual()) {
727 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
728 return true;
729 }
730
731 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
732 if (!OpBank) {
733 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
734 return true;
735 }
736
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
739 return true;
740 }
741 PrevOpBank = OpBank;
742 }
743 return false;
744}
745
746/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
747/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
748/// and of size \p OpSize.
749/// \returns \p GenericOpc if the combination is unsupported.
750static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
751 unsigned OpSize) {
752 switch (RegBankID) {
753 case AArch64::GPRRegBankID:
754 if (OpSize == 32) {
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
762 default:
763 return GenericOpc;
764 }
765 } else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
775 default:
776 return GenericOpc;
777 }
778 }
779 break;
780 case AArch64::FPRRegBankID:
781 switch (OpSize) {
782 case 32:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
792 default:
793 return GenericOpc;
794 }
795 case 64:
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
807 default:
808 return GenericOpc;
809 }
810 }
811 break;
812 }
813 return GenericOpc;
814}
815
816/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
817/// appropriate for the (value) register bank \p RegBankID and of memory access
818/// size \p OpSize. This returns the variant with the base+unsigned-immediate
819/// addressing mode (e.g., LDRXui).
820/// \returns \p GenericOpc if the combination is unsupported.
821static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
822 unsigned OpSize) {
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
824 switch (RegBankID) {
825 case AArch64::GPRRegBankID:
826 switch (OpSize) {
827 case 8:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
829 case 16:
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
831 case 32:
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
833 case 64:
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
835 }
836 break;
837 case AArch64::FPRRegBankID:
838 switch (OpSize) {
839 case 8:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
841 case 16:
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
843 case 32:
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
845 case 64:
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
847 case 128:
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
849 }
850 break;
851 }
852 return GenericOpc;
853}
854
855/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
856/// to \p *To.
857///
858/// E.g "To = COPY SrcReg:SubReg"
859static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
860 const RegisterBankInfo &RBI, Register SrcReg,
861 const TargetRegisterClass *To, unsigned SubReg) {
862 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __extension__ __PRETTY_FUNCTION__))
;
864 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 864, __extension__ __PRETTY_FUNCTION__))
;
865
866 MachineIRBuilder MIB(I);
867 auto SubRegCopy =
868 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
869 MachineOperand &RegOp = I.getOperand(1);
870 RegOp.setReg(SubRegCopy.getReg(0));
871
872 // It's possible that the destination register won't be constrained. Make
873 // sure that happens.
874 if (!I.getOperand(0).getReg().isPhysical())
875 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
876
877 return true;
878}
879
880/// Helper function to get the source and destination register classes for a
881/// copy. Returns a std::pair containing the source register class for the
882/// copy, and the destination register class for the copy. If a register class
883/// cannot be determined, then it will be nullptr.
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
885getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
886 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
887 const RegisterBankInfo &RBI) {
888 Register DstReg = I.getOperand(0).getReg();
889 Register SrcReg = I.getOperand(1).getReg();
890 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
891 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
892 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
893 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
894
895 // Special casing for cross-bank copies of s1s. We can technically represent
896 // a 1-bit value with any size of register. The minimum size for a GPR is 32
897 // bits. So, we need to put the FPR on 32 bits as well.
898 //
899 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
900 // then we can pull it into the helpers that get the appropriate class for a
901 // register bank. Or make a new helper that carries along some constraint
902 // information.
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
905
906 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
907 getMinClassForRegBank(DstRegBank, DstSize, true)};
908}
909
910// FIXME: We need some sort of API in RBI/TRI to allow generic code to
911// constrain operands of simple instructions given a TargetRegisterClass
912// and LLT
913static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
914 const RegisterBankInfo &RBI) {
915 for (MachineOperand &MO : I.operands()) {
916 if (!MO.isReg())
917 continue;
918 Register Reg = MO.getReg();
919 if (!Reg)
920 continue;
921 if (Reg.isPhysical())
922 continue;
923 LLT Ty = MRI.getType(Reg);
924 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
925 const TargetRegisterClass *RC =
926 RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!RC) {
928 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
929 RC = getRegClassForTypeOnBank(Ty, RB);
930 if (!RC) {
931 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
932 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"
; } } while (false)
;
933 break;
934 }
935 }
936 RBI.constrainGenericRegister(Reg, *RC, MRI);
937 }
938
939 return true;
940}
941
942static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
943 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
944 const RegisterBankInfo &RBI) {
945 Register DstReg = I.getOperand(0).getReg();
946 Register SrcReg = I.getOperand(1).getReg();
947 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
948 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
949
950 // Find the correct register classes for the source and destination registers.
951 const TargetRegisterClass *SrcRC;
952 const TargetRegisterClass *DstRC;
953 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
954
955 if (!DstRC) {
956 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
957 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
958 return false;
959 }
960
961 // Is this a copy? If so, then we may need to insert a subregister copy.
962 if (I.isCopy()) {
963 // Yes. Check if there's anything to fix up.
964 if (!SrcRC) {
965 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
966 return false;
967 }
968
969 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
971 unsigned SubReg;
972
973 // If the source bank doesn't support a subregister copy small enough,
974 // then we first need to copy to the destination bank.
975 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
976 const TargetRegisterClass *DstTempRC =
977 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
978 getSubRegForClass(DstRC, TRI, SubReg);
979
980 MachineIRBuilder MIB(I);
981 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
982 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
983 } else if (SrcSize > DstSize) {
984 // If the source register is bigger than the destination we need to
985 // perform a subregister copy.
986 const TargetRegisterClass *SubRegRC =
987 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
988 getSubRegForClass(SubRegRC, TRI, SubReg);
989 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
990 } else if (DstSize > SrcSize) {
991 // If the destination register is bigger than the source we need to do
992 // a promotion using SUBREG_TO_REG.
993 const TargetRegisterClass *PromotionRC =
994 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
995 getSubRegForClass(SrcRC, TRI, SubReg);
996
997 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
998 BuildMI(*I.getParent(), I, I.getDebugLoc(),
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1000 .addImm(0)
1001 .addUse(SrcReg)
1002 .addImm(SubReg);
1003 MachineOperand &RegOp = I.getOperand(1);
1004 RegOp.setReg(PromoteReg);
1005 }
1006
1007 // If the destination is a physical register, then there's nothing to
1008 // change, so we're done.
1009 if (DstReg.isPhysical())
1010 return true;
1011 }
1012
1013 // No need to constrain SrcReg. It will get constrained when we hit another
1014 // of its use or its defs. Copies do not have constraints.
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1016 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1017 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1018 return false;
1019 }
1020
1021 // If this a GPR ZEXT that we want to just reduce down into a copy.
1022 // The sizes will be mismatched with the source < 32b but that's ok.
1023 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(TII.get(AArch64::COPY));
1025 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1025, __extension__ __PRETTY_FUNCTION__))
;
1026 return selectCopy(I, TII, MRI, TRI, RBI);
1027 }
1028
1029 I.setDesc(TII.get(AArch64::COPY));
1030 return true;
1031}
1032
1033static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1034 if (!DstTy.isScalar() || !SrcTy.isScalar())
1035 return GenericOpc;
1036
1037 const unsigned DstSize = DstTy.getSizeInBits();
1038 const unsigned SrcSize = SrcTy.getSizeInBits();
1039
1040 switch (DstSize) {
1041 case 32:
1042 switch (SrcSize) {
1043 case 32:
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1053 default:
1054 return GenericOpc;
1055 }
1056 case 64:
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1066 default:
1067 return GenericOpc;
1068 }
1069 default:
1070 return GenericOpc;
1071 }
1072 case 64:
1073 switch (SrcSize) {
1074 case 32:
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1084 default:
1085 return GenericOpc;
1086 }
1087 case 64:
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1097 default:
1098 return GenericOpc;
1099 }
1100 default:
1101 return GenericOpc;
1102 }
1103 default:
1104 return GenericOpc;
1105 };
1106 return GenericOpc;
1107}
1108
1109MachineInstr *
1110AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1111 Register False, AArch64CC::CondCode CC,
1112 MachineIRBuilder &MIB) const {
1113 MachineRegisterInfo &MRI = *MIB.getMRI();
1114 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1115 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
1116 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1116, __extension__ __PRETTY_FUNCTION__))
;
1117 LLT Ty = MRI.getType(True);
1118 if (Ty.isVector())
1119 return nullptr;
1120 const unsigned Size = Ty.getSizeInBits();
1121 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
1122 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1122, __extension__ __PRETTY_FUNCTION__))
;
1123 const bool Is32Bit = Size == 32;
1124 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1127 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1128 return &*FCSel;
1129 }
1130
1131 // By default, we'll try and emit a CSEL.
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1133 bool Optimized = false;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1135 &Optimized](Register &Reg, Register &OtherReg,
1136 bool Invert) {
1137 if (Optimized)
1138 return false;
1139
1140 // Attempt to fold:
1141 //
1142 // %sub = G_SUB 0, %x
1143 // %select = G_SELECT cc, %reg, %sub
1144 //
1145 // Into:
1146 // %select = CSNEG %reg, %x, cc
1147 Register MatchReg;
1148 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1150 Reg = MatchReg;
1151 if (Invert) {
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 std::swap(Reg, OtherReg);
1154 }
1155 return true;
1156 }
1157
1158 // Attempt to fold:
1159 //
1160 // %xor = G_XOR %x, -1
1161 // %select = G_SELECT cc, %reg, %xor
1162 //
1163 // Into:
1164 // %select = CSINV %reg, %x, cc
1165 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1167 Reg = MatchReg;
1168 if (Invert) {
1169 CC = AArch64CC::getInvertedCondCode(CC);
1170 std::swap(Reg, OtherReg);
1171 }
1172 return true;
1173 }
1174
1175 // Attempt to fold:
1176 //
1177 // %add = G_ADD %x, 1
1178 // %select = G_SELECT cc, %reg, %add
1179 //
1180 // Into:
1181 // %select = CSINC %reg, %x, cc
1182 if (mi_match(Reg, MRI,
1183 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1184 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1186 Reg = MatchReg;
1187 if (Invert) {
1188 CC = AArch64CC::getInvertedCondCode(CC);
1189 std::swap(Reg, OtherReg);
1190 }
1191 return true;
1192 }
1193
1194 return false;
1195 };
1196
1197 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1198 // true/false values are constants.
1199 // FIXME: All of these patterns already exist in tablegen. We should be
1200 // able to import these.
1201 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1202 &Optimized]() {
1203 if (Optimized)
1204 return false;
1205 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1206 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1207 if (!TrueCst && !FalseCst)
1208 return false;
1209
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t T = TrueCst->Value.getSExtValue();
1213 int64_t F = FalseCst->Value.getSExtValue();
1214
1215 if (T == 0 && F == 1) {
1216 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1218 True = ZReg;
1219 False = ZReg;
1220 return true;
1221 }
1222
1223 if (T == 0 && F == -1) {
1224 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1226 True = ZReg;
1227 False = ZReg;
1228 return true;
1229 }
1230 }
1231
1232 if (TrueCst) {
1233 int64_t T = TrueCst->Value.getSExtValue();
1234 if (T == 1) {
1235 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1237 True = False;
1238 False = ZReg;
1239 CC = AArch64CC::getInvertedCondCode(CC);
1240 return true;
1241 }
1242
1243 if (T == -1) {
1244 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251 }
1252
1253 if (FalseCst) {
1254 int64_t F = FalseCst->Value.getSExtValue();
1255 if (F == 1) {
1256 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 False = ZReg;
1259 return true;
1260 }
1261
1262 if (F == -1) {
1263 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1265 False = ZReg;
1266 return true;
1267 }
1268 }
1269 return false;
1270 };
1271
1272 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1274 Optimized |= TryOptSelectCst();
1275 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1276 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1277 return &*SelectInst;
1278}
1279
1280static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1281 switch (P) {
1282 default:
1283 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1283)
;
1284 case CmpInst::ICMP_NE:
1285 return AArch64CC::NE;
1286 case CmpInst::ICMP_EQ:
1287 return AArch64CC::EQ;
1288 case CmpInst::ICMP_SGT:
1289 return AArch64CC::GT;
1290 case CmpInst::ICMP_SGE:
1291 return AArch64CC::GE;
1292 case CmpInst::ICMP_SLT:
1293 return AArch64CC::LT;
1294 case CmpInst::ICMP_SLE:
1295 return AArch64CC::LE;
1296 case CmpInst::ICMP_UGT:
1297 return AArch64CC::HI;
1298 case CmpInst::ICMP_UGE:
1299 return AArch64CC::HS;
1300 case CmpInst::ICMP_ULT:
1301 return AArch64CC::LO;
1302 case CmpInst::ICMP_ULE:
1303 return AArch64CC::LS;
1304 }
1305}
1306
1307/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1308static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1309 AArch64CC::CondCode &CondCode,
1310 AArch64CC::CondCode &CondCode2) {
1311 CondCode2 = AArch64CC::AL;
1312 switch (CC) {
1313 default:
1314 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1314)
;
1315 case CmpInst::FCMP_OEQ:
1316 CondCode = AArch64CC::EQ;
1317 break;
1318 case CmpInst::FCMP_OGT:
1319 CondCode = AArch64CC::GT;
1320 break;
1321 case CmpInst::FCMP_OGE:
1322 CondCode = AArch64CC::GE;
1323 break;
1324 case CmpInst::FCMP_OLT:
1325 CondCode = AArch64CC::MI;
1326 break;
1327 case CmpInst::FCMP_OLE:
1328 CondCode = AArch64CC::LS;
1329 break;
1330 case CmpInst::FCMP_ONE:
1331 CondCode = AArch64CC::MI;
1332 CondCode2 = AArch64CC::GT;
1333 break;
1334 case CmpInst::FCMP_ORD:
1335 CondCode = AArch64CC::VC;
1336 break;
1337 case CmpInst::FCMP_UNO:
1338 CondCode = AArch64CC::VS;
1339 break;
1340 case CmpInst::FCMP_UEQ:
1341 CondCode = AArch64CC::EQ;
1342 CondCode2 = AArch64CC::VS;
1343 break;
1344 case CmpInst::FCMP_UGT:
1345 CondCode = AArch64CC::HI;
1346 break;
1347 case CmpInst::FCMP_UGE:
1348 CondCode = AArch64CC::PL;
1349 break;
1350 case CmpInst::FCMP_ULT:
1351 CondCode = AArch64CC::LT;
1352 break;
1353 case CmpInst::FCMP_ULE:
1354 CondCode = AArch64CC::LE;
1355 break;
1356 case CmpInst::FCMP_UNE:
1357 CondCode = AArch64CC::NE;
1358 break;
1359 }
1360}
1361
1362/// Convert an IR fp condition code to an AArch64 CC.
1363/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1364/// should be AND'ed instead of OR'ed.
1365static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1366 AArch64CC::CondCode &CondCode,
1367 AArch64CC::CondCode &CondCode2) {
1368 CondCode2 = AArch64CC::AL;
1369 switch (CC) {
1370 default:
1371 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1372 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1372, __extension__ __PRETTY_FUNCTION__))
;
1373 break;
1374 case CmpInst::FCMP_ONE:
1375 // (a one b)
1376 // == ((a olt b) || (a ogt b))
1377 // == ((a ord b) && (a une b))
1378 CondCode = AArch64CC::VC;
1379 CondCode2 = AArch64CC::NE;
1380 break;
1381 case CmpInst::FCMP_UEQ:
1382 // (a ueq b)
1383 // == ((a uno b) || (a oeq b))
1384 // == ((a ule b) && (a uge b))
1385 CondCode = AArch64CC::PL;
1386 CondCode2 = AArch64CC::LE;
1387 break;
1388 }
1389}
1390
1391/// Return a register which can be used as a bit to test in a TB(N)Z.
1392static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1393 MachineRegisterInfo &MRI) {
1394 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__))
;
1395 bool HasZext = false;
1396 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1397 unsigned Opc = MI->getOpcode();
1398
1399 if (!MI->getOperand(0).isReg() ||
1400 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1401 break;
1402
1403 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1404 //
1405 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1406 // on the truncated x is the same as the bit number on x.
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1410 HasZext = true;
1411
1412 Register NextReg = MI->getOperand(1).getReg();
1413 // Did we find something worth folding?
1414 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1415 break;
1416
1417 // NextReg is worth folding. Keep looking.
1418 Reg = NextReg;
1419 continue;
1420 }
1421
1422 // Attempt to find a suitable operation with a constant on one side.
1423 std::optional<uint64_t> C;
1424 Register TestReg;
1425 switch (Opc) {
1426 default:
1427 break;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg = MI->getOperand(1).getReg();
1431 Register ConstantReg = MI->getOperand(2).getReg();
1432 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1433 if (!VRegAndVal) {
1434 // AND commutes, check the other side for a constant.
1435 // FIXME: Can we canonicalize the constant so that it's always on the
1436 // same side at some point earlier?
1437 std::swap(ConstantReg, TestReg);
1438 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1439 }
1440 if (VRegAndVal) {
1441 if (HasZext)
1442 C = VRegAndVal->Value.getZExtValue();
1443 else
1444 C = VRegAndVal->Value.getSExtValue();
1445 }
1446 break;
1447 }
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg = MI->getOperand(1).getReg();
1452 auto VRegAndVal =
1453 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1454 if (VRegAndVal)
1455 C = VRegAndVal->Value.getSExtValue();
1456 break;
1457 }
1458 }
1459
1460 // Didn't find a constant or viable register. Bail out of the loop.
1461 if (!C || !TestReg.isValid())
1462 break;
1463
1464 // We found a suitable instruction with a constant. Check to see if we can
1465 // walk through the instruction.
1466 Register NextReg;
1467 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1468 switch (Opc) {
1469 default:
1470 break;
1471 case TargetOpcode::G_AND:
1472 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1473 if ((*C >> Bit) & 1)
1474 NextReg = TestReg;
1475 break;
1476 case TargetOpcode::G_SHL:
1477 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1478 // the type of the register.
1479 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1480 NextReg = TestReg;
1481 Bit = Bit - *C;
1482 }
1483 break;
1484 case TargetOpcode::G_ASHR:
1485 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1486 // in x
1487 NextReg = TestReg;
1488 Bit = Bit + *C;
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1491 break;
1492 case TargetOpcode::G_LSHR:
1493 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1494 if ((Bit + *C) < TestRegSize) {
1495 NextReg = TestReg;
1496 Bit = Bit + *C;
1497 }
1498 break;
1499 case TargetOpcode::G_XOR:
1500 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1501 // appropriate.
1502 //
1503 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1504 //
1505 // tbz x', b -> tbnz x, b
1506 //
1507 // Because x' only has the b-th bit set if x does not.
1508 if ((*C >> Bit) & 1)
1509 Invert = !Invert;
1510 NextReg = TestReg;
1511 break;
1512 }
1513
1514 // Check if we found anything worth folding.
1515 if (!NextReg.isValid())
1516 return Reg;
1517 Reg = NextReg;
1518 }
1519
1520 return Reg;
1521}
1522
1523MachineInstr *AArch64InstructionSelector::emitTestBit(
1524 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
1528 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1528, __extension__ __PRETTY_FUNCTION__))
;
1529 MachineRegisterInfo &MRI = *MIB.getMRI();
1530
1531 // Attempt to optimize the test bit by walking over instructions.
1532 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1533 LLT Ty = MRI.getType(TestReg);
1534 unsigned Size = Ty.getSizeInBits();
1535 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1535, __extension__ __PRETTY_FUNCTION__))
;
1536 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537
1538 // When the test register is a 64-bit register, we have to narrow to make
1539 // TBNZW work.
1540 bool UseWReg = Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1545 MIB);
1546
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1550 auto TestBitMI =
1551 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1552 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1553 return &*TestBitMI;
1554}
1555
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1557 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1558 MachineIRBuilder &MIB) const {
1559 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1559, __extension__ __PRETTY_FUNCTION__))
;
1560 // Given something like this:
1561 //
1562 // %x = ...Something...
1563 // %one = G_CONSTANT i64 1
1564 // %zero = G_CONSTANT i64 0
1565 // %and = G_AND %x, %one
1566 // %cmp = G_ICMP intpred(ne), %and, %zero
1567 // %cmp_trunc = G_TRUNC %cmp
1568 // G_BRCOND %cmp_trunc, %bb.3
1569 //
1570 // We want to try and fold the AND into the G_BRCOND and produce either a
1571 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1572 //
1573 // In this case, we'd get
1574 //
1575 // TBNZ %x %bb.3
1576 //
1577
1578 // Check if the AND has a constant on its RHS which we can use as a mask.
1579 // If it's a power of 2, then it's the same as checking a specific bit.
1580 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1581 auto MaybeBit = getIConstantVRegValWithLookThrough(
1582 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1583 if (!MaybeBit)
1584 return false;
1585
1586 int32_t Bit = MaybeBit->Value.exactLogBase2();
1587 if (Bit < 0)
1588 return false;
1589
1590 Register TestReg = AndInst.getOperand(1).getReg();
1591
1592 // Emit a TB(N)Z.
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1594 return true;
1595}
1596
1597MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1598 bool IsNegative,
1599 MachineBasicBlock *DestMBB,
1600 MachineIRBuilder &MIB) const {
1601 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1601, __extension__ __PRETTY_FUNCTION__))
;
1602 MachineRegisterInfo &MRI = *MIB.getMRI();
1603 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1604 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
1605 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1605, __extension__ __PRETTY_FUNCTION__))
;
1606 auto Ty = MRI.getType(CompareReg);
1607 unsigned Width = Ty.getSizeInBits();
1608 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1608, __extension__ __PRETTY_FUNCTION__))
;
1609 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1609, __extension__ __PRETTY_FUNCTION__))
;
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][Width == 64];
1613 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1614 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1615 return &*BranchMI;
1616}
1617
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1619 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1620 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1620, __extension__ __PRETTY_FUNCTION__))
;
1621 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1621, __extension__ __PRETTY_FUNCTION__))
;
1622 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1623 // totally clean. Some of them require two branches to implement.
1624 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1625 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1626 Pred);
1627 AArch64CC::CondCode CC1, CC2;
1628 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1629 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1630 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1631 if (CC2 != AArch64CC::AL)
1632 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1633 I.eraseFromParent();
1634 return true;
1635}
1636
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1638 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1639 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1639, __extension__ __PRETTY_FUNCTION__))
;
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1640, __extension__ __PRETTY_FUNCTION__))
;
1641 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1642 //
1643 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1644 // instructions will not be produced, as they are conditional branch
1645 // instructions that do not set flags.
1646 if (!ProduceNonFlagSettingCondBr)
1647 return false;
1648
1649 MachineRegisterInfo &MRI = *MIB.getMRI();
1650 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1651 auto Pred =
1652 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1653 Register LHS = ICmp.getOperand(2).getReg();
1654 Register RHS = ICmp.getOperand(3).getReg();
1655
1656 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1657 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1658 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1659
1660 // When we can emit a TB(N)Z, prefer that.
1661 //
1662 // Handle non-commutative condition codes first.
1663 // Note that we don't want to do this when we have a G_AND because it can
1664 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1665 if (VRegAndVal && !AndInst) {
1666 int64_t C = VRegAndVal->Value.getSExtValue();
1667
1668 // When we have a greater-than comparison, we can just test if the msb is
1669 // zero.
1670 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1671 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1672 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1673 I.eraseFromParent();
1674 return true;
1675 }
1676
1677 // When we have a less than comparison, we can just test if the msb is not
1678 // zero.
1679 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1680 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1681 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1682 I.eraseFromParent();
1683 return true;
1684 }
1685
1686 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1687 // we can test if the msb is zero.
1688 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1689 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1690 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1691 I.eraseFromParent();
1692 return true;
1693 }
1694 }
1695
1696 // Attempt to handle commutative condition codes. Right now, that's only
1697 // eq/ne.
1698 if (ICmpInst::isEquality(Pred)) {
1699 if (!VRegAndVal) {
1700 std::swap(RHS, LHS);
1701 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1702 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1703 }
1704
1705 if (VRegAndVal && VRegAndVal->Value == 0) {
1706 // If there's a G_AND feeding into this branch, try to fold it away by
1707 // emitting a TB(N)Z instead.
1708 //
1709 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1710 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1711 // would be redundant.
1712 if (AndInst &&
1713 tryOptAndIntoCompareBranch(
1714 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1715 I.eraseFromParent();
1716 return true;
1717 }
1718
1719 // Otherwise, try to emit a CB(N)Z instead.
1720 auto LHSTy = MRI.getType(LHS);
1721 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1722 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1723 I.eraseFromParent();
1724 return true;
1725 }
1726 }
1727 }
1728
1729 return false;
1730}
1731
1732bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1733 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1734 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1734, __extension__ __PRETTY_FUNCTION__))
;
1735 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1735, __extension__ __PRETTY_FUNCTION__))
;
1736 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1737 return true;
1738
1739 // Couldn't optimize. Emit a compare + a Bcc.
1740 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1741 auto PredOp = ICmp.getOperand(1);
1742 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1743 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1744 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1745 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1746 I.eraseFromParent();
1747 return true;
1748}
1749
1750bool AArch64InstructionSelector::selectCompareBranch(
1751 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1752 Register CondReg = I.getOperand(0).getReg();
1753 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1754 // Try to select the G_BRCOND using whatever is feeding the condition if
1755 // possible.
1756 unsigned CCMIOpc = CCMI->getOpcode();
1757 if (CCMIOpc == TargetOpcode::G_FCMP)
1758 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1759 if (CCMIOpc == TargetOpcode::G_ICMP)
1760 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1761
1762 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1763 // instructions will not be produced, as they are conditional branch
1764 // instructions that do not set flags.
1765 if (ProduceNonFlagSettingCondBr) {
1766 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1767 I.getOperand(1).getMBB(), MIB);
1768 I.eraseFromParent();
1769 return true;
1770 }
1771
1772 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1773 auto TstMI =
1774 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1775 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1776 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1777 .addImm(AArch64CC::EQ)
1778 .addMBB(I.getOperand(1).getMBB());
1779 I.eraseFromParent();
1780 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1781}
1782
1783/// Returns the element immediate value of a vector shift operand if found.
1784/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1785static std::optional<int64_t> getVectorShiftImm(Register Reg,
1786 MachineRegisterInfo &MRI) {
1787 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1787, __extension__ __PRETTY_FUNCTION__))
;
1788 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1789 return getAArch64VectorSplatScalar(*OpMI, MRI);
1790}
1791
1792/// Matches and returns the shift immediate value for a SHL instruction given
1793/// a shift operand.
1794static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1795 MachineRegisterInfo &MRI) {
1796 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1797 if (!ShiftImm)
1798 return std::nullopt;
1799 // Check the immediate is in range for a SHL.
1800 int64_t Imm = *ShiftImm;
1801 if (Imm < 0)
1802 return std::nullopt;
1803 switch (SrcTy.getElementType().getSizeInBits()) {
1804 default:
1805 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1806 return std::nullopt;
1807 case 8:
1808 if (Imm > 7)
1809 return std::nullopt;
1810 break;
1811 case 16:
1812 if (Imm > 15)
1813 return std::nullopt;
1814 break;
1815 case 32:
1816 if (Imm > 31)
1817 return std::nullopt;
1818 break;
1819 case 64:
1820 if (Imm > 63)
1821 return std::nullopt;
1822 break;
1823 }
1824 return Imm;
1825}
1826
1827bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1828 MachineRegisterInfo &MRI) {
1829 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1829, __extension__ __PRETTY_FUNCTION__))
;
1830 Register DstReg = I.getOperand(0).getReg();
1831 const LLT Ty = MRI.getType(DstReg);
1832 Register Src1Reg = I.getOperand(1).getReg();
1833 Register Src2Reg = I.getOperand(2).getReg();
1834
1835 if (!Ty.isVector())
1836 return false;
1837
1838 // Check if we have a vector of constants on RHS that we can select as the
1839 // immediate form.
1840 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1841
1842 unsigned Opc = 0;
1843 if (Ty == LLT::fixed_vector(2, 64)) {
1844 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1845 } else if (Ty == LLT::fixed_vector(4, 32)) {
1846 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1847 } else if (Ty == LLT::fixed_vector(2, 32)) {
1848 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1849 } else if (Ty == LLT::fixed_vector(4, 16)) {
1850 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1851 } else if (Ty == LLT::fixed_vector(8, 16)) {
1852 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1853 } else if (Ty == LLT::fixed_vector(16, 8)) {
1854 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1855 } else if (Ty == LLT::fixed_vector(8, 8)) {
1856 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1857 } else {
1858 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1859 return false;
1860 }
1861
1862 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1863 if (ImmVal)
1864 Shl.addImm(*ImmVal);
1865 else
1866 Shl.addUse(Src2Reg);
1867 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1868 I.eraseFromParent();
1869 return true;
1870}
1871
1872bool AArch64InstructionSelector::selectVectorAshrLshr(
1873 MachineInstr &I, MachineRegisterInfo &MRI) {
1874 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1875, __extension__ __PRETTY_FUNCTION__))
1875 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1875, __extension__ __PRETTY_FUNCTION__))
;
1876 Register DstReg = I.getOperand(0).getReg();
1877 const LLT Ty = MRI.getType(DstReg);
1878 Register Src1Reg = I.getOperand(1).getReg();
1879 Register Src2Reg = I.getOperand(2).getReg();
1880
1881 if (!Ty.isVector())
1882 return false;
1883
1884 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1885
1886 // We expect the immediate case to be lowered in the PostLegalCombiner to
1887 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1888
1889 // There is not a shift right register instruction, but the shift left
1890 // register instruction takes a signed value, where negative numbers specify a
1891 // right shift.
1892
1893 unsigned Opc = 0;
1894 unsigned NegOpc = 0;
1895 const TargetRegisterClass *RC =
1896 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1897 if (Ty == LLT::fixed_vector(2, 64)) {
1898 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1899 NegOpc = AArch64::NEGv2i64;
1900 } else if (Ty == LLT::fixed_vector(4, 32)) {
1901 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1902 NegOpc = AArch64::NEGv4i32;
1903 } else if (Ty == LLT::fixed_vector(2, 32)) {
1904 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1905 NegOpc = AArch64::NEGv2i32;
1906 } else if (Ty == LLT::fixed_vector(4, 16)) {
1907 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1908 NegOpc = AArch64::NEGv4i16;
1909 } else if (Ty == LLT::fixed_vector(8, 16)) {
1910 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1911 NegOpc = AArch64::NEGv8i16;
1912 } else if (Ty == LLT::fixed_vector(16, 8)) {
1913 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1914 NegOpc = AArch64::NEGv16i8;
1915 } else if (Ty == LLT::fixed_vector(8, 8)) {
1916 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1917 NegOpc = AArch64::NEGv8i8;
1918 } else {
1919 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1920 return false;
1921 }
1922
1923 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1924 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1925 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1926 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1927 I.eraseFromParent();
1928 return true;
1929}
1930
1931bool AArch64InstructionSelector::selectVaStartAAPCS(
1932 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1933 return false;
1934}
1935
1936bool AArch64InstructionSelector::selectVaStartDarwin(
1937 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1938 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1939 Register ListReg = I.getOperand(0).getReg();
1940
1941 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1942
1943 int FrameIdx = FuncInfo->getVarArgsStackIndex();
1944 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
1945 MF.getFunction().getCallingConv())) {
1946 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
1947 ? FuncInfo->getVarArgsGPRIndex()
1948 : FuncInfo->getVarArgsStackIndex();
1949 }
1950
1951 auto MIB =
1952 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1953 .addDef(ArgsAddrReg)
1954 .addFrameIndex(FrameIdx)
1955 .addImm(0)
1956 .addImm(0);
1957
1958 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1959
1960 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1961 .addUse(ArgsAddrReg)
1962 .addUse(ListReg)
1963 .addImm(0)
1964 .addMemOperand(*I.memoperands_begin());
1965
1966 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1967 I.eraseFromParent();
1968 return true;
1969}
1970
1971void AArch64InstructionSelector::materializeLargeCMVal(
1972 MachineInstr &I, const Value *V, unsigned OpFlags) {
1973 MachineBasicBlock &MBB = *I.getParent();
1974 MachineFunction &MF = *MBB.getParent();
1975 MachineRegisterInfo &MRI = MF.getRegInfo();
1976
1977 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1978 MovZ->addOperand(MF, I.getOperand(1));
1979 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1980 AArch64II::MO_NC);
1981 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1982 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1983
1984 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1985 Register ForceDstReg) {
1986 Register DstReg = ForceDstReg
1987 ? ForceDstReg
1988 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1989 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1990 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1991 MovI->addOperand(MF, MachineOperand::CreateGA(
1992 GV, MovZ->getOperand(1).getOffset(), Flags));
1993 } else {
1994 MovI->addOperand(
1995 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1996 MovZ->getOperand(1).getOffset(), Flags));
1997 }
1998 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1999 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
2000 return DstReg;
2001 };
2002 Register DstReg = BuildMovK(MovZ.getReg(0),
2003 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2004 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2005 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2006}
2007
2008bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2009 MachineBasicBlock &MBB = *I.getParent();
2010 MachineFunction &MF = *MBB.getParent();
2011 MachineRegisterInfo &MRI = MF.getRegInfo();
2012
2013 switch (I.getOpcode()) {
2014 case TargetOpcode::G_STORE: {
2015 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2016 MachineOperand &SrcOp = I.getOperand(0);
2017 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2018 // Allow matching with imported patterns for stores of pointers. Unlike
2019 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2020 // and constrain.
2021 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2022 Register NewSrc = Copy.getReg(0);
2023 SrcOp.setReg(NewSrc);
2024 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2025 Changed = true;
2026 }
2027 return Changed;
2028 }
2029 case TargetOpcode::G_PTR_ADD:
2030 return convertPtrAddToAdd(I, MRI);
2031 case TargetOpcode::G_LOAD: {
2032 // For scalar loads of pointers, we try to convert the dest type from p0
2033 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2034 // conversion, this should be ok because all users should have been
2035 // selected already, so the type doesn't matter for them.
2036 Register DstReg = I.getOperand(0).getReg();
2037 const LLT DstTy = MRI.getType(DstReg);
2038 if (!DstTy.isPointer())
2039 return false;
2040 MRI.setType(DstReg, LLT::scalar(64));
2041 return true;
2042 }
2043 case AArch64::G_DUP: {
2044 // Convert the type from p0 to s64 to help selection.
2045 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2046 if (!DstTy.getElementType().isPointer())
2047 return false;
2048 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2049 MRI.setType(I.getOperand(0).getReg(),
2050 DstTy.changeElementType(LLT::scalar(64)));
2051 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2052 I.getOperand(1).setReg(NewSrc.getReg(0));
2053 return true;
2054 }
2055 case TargetOpcode::G_UITOFP:
2056 case TargetOpcode::G_SITOFP: {
2057 // If both source and destination regbanks are FPR, then convert the opcode
2058 // to G_SITOF so that the importer can select it to an fpr variant.
2059 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2060 // copy.
2061 Register SrcReg = I.getOperand(1).getReg();
2062 LLT SrcTy = MRI.getType(SrcReg);
2063 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2064 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2065 return false;
2066
2067 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2068 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2069 I.setDesc(TII.get(AArch64::G_SITOF));
2070 else
2071 I.setDesc(TII.get(AArch64::G_UITOF));
2072 return true;
2073 }
2074 return false;
2075 }
2076 default:
2077 return false;
2078 }
2079}
2080
2081/// This lowering tries to look for G_PTR_ADD instructions and then converts
2082/// them to a standard G_ADD with a COPY on the source.
2083///
2084/// The motivation behind this is to expose the add semantics to the imported
2085/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2086/// because the selector works bottom up, uses before defs. By the time we
2087/// end up trying to select a G_PTR_ADD, we should have already attempted to
2088/// fold this into addressing modes and were therefore unsuccessful.
2089bool AArch64InstructionSelector::convertPtrAddToAdd(
2090 MachineInstr &I, MachineRegisterInfo &MRI) {
2091 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2091, __extension__ __PRETTY_FUNCTION__))
;
2092 Register DstReg = I.getOperand(0).getReg();
2093 Register AddOp1Reg = I.getOperand(1).getReg();
2094 const LLT PtrTy = MRI.getType(DstReg);
2095 if (PtrTy.getAddressSpace() != 0)
2096 return false;
2097
2098 const LLT CastPtrTy =
2099 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2100 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2101 // Set regbanks on the registers.
2102 if (PtrTy.isVector())
2103 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2104 else
2105 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2106
2107 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2108 // %dst(intty) = G_ADD %intbase, off
2109 I.setDesc(TII.get(TargetOpcode::G_ADD));
2110 MRI.setType(DstReg, CastPtrTy);
2111 I.getOperand(1).setReg(PtrToInt.getReg(0));
2112 if (!select(*PtrToInt)) {
2113 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2114 return false;
2115 }
2116
2117 // Also take the opportunity here to try to do some optimization.
2118 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2119 Register NegatedReg;
2120 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2121 return true;
2122 I.getOperand(2).setReg(NegatedReg);
2123 I.setDesc(TII.get(TargetOpcode::G_SUB));
2124 return true;
2125}
2126
2127bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2128 MachineRegisterInfo &MRI) {
2129 // We try to match the immediate variant of LSL, which is actually an alias
2130 // for a special case of UBFM. Otherwise, we fall back to the imported
2131 // selector which will match the register variant.
2132 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2132, __extension__ __PRETTY_FUNCTION__))
;
2133 const auto &MO = I.getOperand(2);
2134 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2135 if (!VRegAndVal)
2136 return false;
2137
2138 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2139 if (DstTy.isVector())
2140 return false;
2141 bool Is64Bit = DstTy.getSizeInBits() == 64;
2142 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2143 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2144
2145 if (!Imm1Fn || !Imm2Fn)
2146 return false;
2147
2148 auto NewI =
2149 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2150 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2151
2152 for (auto &RenderFn : *Imm1Fn)
2153 RenderFn(NewI);
2154 for (auto &RenderFn : *Imm2Fn)
2155 RenderFn(NewI);
2156
2157 I.eraseFromParent();
2158 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2159}
2160
2161bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2162 MachineInstr &I, MachineRegisterInfo &MRI) {
2163 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2163, __extension__ __PRETTY_FUNCTION__))
;
2164 // If we're storing a scalar, it doesn't matter what register bank that
2165 // scalar is on. All that matters is the size.
2166 //
2167 // So, if we see something like this (with a 32-bit scalar as an example):
2168 //
2169 // %x:gpr(s32) = ... something ...
2170 // %y:fpr(s32) = COPY %x:gpr(s32)
2171 // G_STORE %y:fpr(s32)
2172 //
2173 // We can fix this up into something like this:
2174 //
2175 // G_STORE %x:gpr(s32)
2176 //
2177 // And then continue the selection process normally.
2178 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2179 if (!DefDstReg.isValid())
2180 return false;
2181 LLT DefDstTy = MRI.getType(DefDstReg);
2182 Register StoreSrcReg = I.getOperand(0).getReg();
2183 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2184
2185 // If we get something strange like a physical register, then we shouldn't
2186 // go any further.
2187 if (!DefDstTy.isValid())
2188 return false;
2189
2190 // Are the source and dst types the same size?
2191 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2192 return false;
2193
2194 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2195 RBI.getRegBank(DefDstReg, MRI, TRI))
2196 return false;
2197
2198 // We have a cross-bank copy, which is entering a store. Let's fold it.
2199 I.getOperand(0).setReg(DefDstReg);
2200 return true;
2201}
2202
2203bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2204 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2204, __extension__ __PRETTY_FUNCTION__))
;
2205 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2205, __extension__ __PRETTY_FUNCTION__))
;
2206
2207 MachineBasicBlock &MBB = *I.getParent();
2208 MachineFunction &MF = *MBB.getParent();
2209 MachineRegisterInfo &MRI = MF.getRegInfo();
2210
2211 switch (I.getOpcode()) {
2212 case AArch64::G_DUP: {
2213 // Before selecting a DUP instruction, check if it is better selected as a
2214 // MOV or load from a constant pool.
2215 Register Src = I.getOperand(1).getReg();
2216 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2217 if (!ValAndVReg)
2218 return false;
2219 LLVMContext &Ctx = MF.getFunction().getContext();
2220 Register Dst = I.getOperand(0).getReg();
2221 auto *CV = ConstantDataVector::getSplat(
2222 MRI.getType(Dst).getNumElements(),
2223 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2224 ValAndVReg->Value));
2225 if (!emitConstantVector(Dst, CV, MIB, MRI))
2226 return false;
2227 I.eraseFromParent();
2228 return true;
2229 }
2230 case TargetOpcode::G_SEXT:
2231 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2232 // over a normal extend.
2233 if (selectUSMovFromExtend(I, MRI))
2234 return true;
2235 return false;
2236 case TargetOpcode::G_BR:
2237 return false;
2238 case TargetOpcode::G_SHL:
2239 return earlySelectSHL(I, MRI);
2240 case TargetOpcode::G_CONSTANT: {
2241 bool IsZero = false;
2242 if (I.getOperand(1).isCImm())
2243 IsZero = I.getOperand(1).getCImm()->isZero();
2244 else if (I.getOperand(1).isImm())
2245 IsZero = I.getOperand(1).getImm() == 0;
2246
2247 if (!IsZero)
2248 return false;
2249
2250 Register DefReg = I.getOperand(0).getReg();
2251 LLT Ty = MRI.getType(DefReg);
2252 if (Ty.getSizeInBits() == 64) {
2253 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2254 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2255 } else if (Ty.getSizeInBits() == 32) {
2256 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2257 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2258 } else
2259 return false;
2260
2261 I.setDesc(TII.get(TargetOpcode::COPY));
2262 return true;
2263 }
2264
2265 case TargetOpcode::G_ADD: {
2266 // Check if this is being fed by a G_ICMP on either side.
2267 //
2268 // (cmp pred, x, y) + z
2269 //
2270 // In the above case, when the cmp is true, we increment z by 1. So, we can
2271 // fold the add into the cset for the cmp by using cinc.
2272 //
2273 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2274 Register AddDst = I.getOperand(0).getReg();
2275 Register AddLHS = I.getOperand(1).getReg();
2276 Register AddRHS = I.getOperand(2).getReg();
2277 // Only handle scalars.
2278 LLT Ty = MRI.getType(AddLHS);
2279 if (Ty.isVector())
2280 return false;
2281 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2282 // bits.
2283 unsigned Size = Ty.getSizeInBits();
2284 if (Size != 32 && Size != 64)
2285 return false;
2286 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2287 if (!MRI.hasOneNonDBGUse(Reg))
2288 return nullptr;
2289 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2290 // compare.
2291 if (Size == 32)
2292 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2293 // We model scalar compares using 32-bit destinations right now.
2294 // If it's a 64-bit compare, it'll have 64-bit sources.
2295 Register ZExt;
2296 if (!mi_match(Reg, MRI,
2297 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2298 return nullptr;
2299 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2300 if (!Cmp ||
2301 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2302 return nullptr;
2303 return Cmp;
2304 };
2305 // Try to match
2306 // z + (cmp pred, x, y)
2307 MachineInstr *Cmp = MatchCmp(AddRHS);
2308 if (!Cmp) {
2309 // (cmp pred, x, y) + z
2310 std::swap(AddLHS, AddRHS);
2311 Cmp = MatchCmp(AddRHS);
2312 if (!Cmp)
2313 return false;
2314 }
2315 auto &PredOp = Cmp->getOperand(1);
2316 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2317 const AArch64CC::CondCode InvCC =
2318 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2319 MIB.setInstrAndDebugLoc(I);
2320 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2321 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2322 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2323 I.eraseFromParent();
2324 return true;
2325 }
2326 case TargetOpcode::G_OR: {
2327 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2328 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2329 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2330 Register Dst = I.getOperand(0).getReg();
2331 LLT Ty = MRI.getType(Dst);
2332
2333 if (!Ty.isScalar())
2334 return false;
2335
2336 unsigned Size = Ty.getSizeInBits();
2337 if (Size != 32 && Size != 64)
2338 return false;
2339
2340 Register ShiftSrc;
2341 int64_t ShiftImm;
2342 Register MaskSrc;
2343 int64_t MaskImm;
2344 if (!mi_match(
2345 Dst, MRI,
2346 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2347 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2348 return false;
2349
2350 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2351 return false;
2352
2353 int64_t Immr = Size - ShiftImm;
2354 int64_t Imms = Size - ShiftImm - 1;
2355 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2356 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2357 I.eraseFromParent();
2358 return true;
2359 }
2360 case TargetOpcode::G_FENCE: {
2361 if (I.getOperand(1).getImm() == 0)
2362 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2363 else
2364 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2365 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2366 I.eraseFromParent();
2367 return true;
2368 }
2369 default:
2370 return false;
2371 }
2372}
2373
2374bool AArch64InstructionSelector::select(MachineInstr &I) {
2375 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2375, __extension__ __PRETTY_FUNCTION__))
;
2376 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2376, __extension__ __PRETTY_FUNCTION__))
;
2377
2378 MachineBasicBlock &MBB = *I.getParent();
2379 MachineFunction &MF = *MBB.getParent();
2380 MachineRegisterInfo &MRI = MF.getRegInfo();
2381
2382 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2383 if (Subtarget->requiresStrictAlign()) {
2384 // We don't support this feature yet.
2385 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2386 return false;
2387 }
2388
2389 MIB.setInstrAndDebugLoc(I);
2390
2391 unsigned Opcode = I.getOpcode();
2392 // G_PHI requires same handling as PHI
2393 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2394 // Certain non-generic instructions also need some special handling.
2395
2396 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2397 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2398
2399 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2400 const Register DefReg = I.getOperand(0).getReg();
2401 const LLT DefTy = MRI.getType(DefReg);
2402
2403 const RegClassOrRegBank &RegClassOrBank =
2404 MRI.getRegClassOrRegBank(DefReg);
2405
2406 const TargetRegisterClass *DefRC
2407 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2408 if (!DefRC) {
2409 if (!DefTy.isValid()) {
2410 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2411 return false;
2412 }
2413 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2414 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2415 if (!DefRC) {
2416 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2417 return false;
2418 }
2419 }
2420
2421 I.setDesc(TII.get(TargetOpcode::PHI));
2422
2423 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2424 }
2425
2426 if (I.isCopy())
2427 return selectCopy(I, TII, MRI, TRI, RBI);
2428
2429 if (I.isDebugInstr())
2430 return selectDebugInstr(I, MRI, RBI);
2431
2432 return true;
2433 }
2434
2435
2436 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2437 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2438 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2439 return false;
2440 }
2441
2442 // Try to do some lowering before we start instruction selecting. These
2443 // lowerings are purely transformations on the input G_MIR and so selection
2444 // must continue after any modification of the instruction.
2445 if (preISelLower(I)) {
2446 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2447 }
2448
2449 // There may be patterns where the importer can't deal with them optimally,
2450 // but does select it to a suboptimal sequence so our custom C++ selection
2451 // code later never has a chance to work on it. Therefore, we have an early
2452 // selection attempt here to give priority to certain selection routines
2453 // over the imported ones.
2454 if (earlySelect(I))
2455 return true;
2456
2457 if (selectImpl(I, *CoverageInfo))
2458 return true;
2459
2460 LLT Ty =
2461 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2462
2463 switch (Opcode) {
2464 case TargetOpcode::G_SBFX:
2465 case TargetOpcode::G_UBFX: {
2466 static const unsigned OpcTable[2][2] = {
2467 {AArch64::UBFMWri, AArch64::UBFMXri},
2468 {AArch64::SBFMWri, AArch64::SBFMXri}};
2469 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2470 unsigned Size = Ty.getSizeInBits();
2471 unsigned Opc = OpcTable[IsSigned][Size == 64];
2472 auto Cst1 =
2473 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2474 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2474, __extension__ __PRETTY_FUNCTION__))
;
2475 auto Cst2 =
2476 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2477 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2477, __extension__ __PRETTY_FUNCTION__))
;
2478 auto LSB = Cst1->Value.getZExtValue();
2479 auto Width = Cst2->Value.getZExtValue();
2480 auto BitfieldInst =
2481 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2482 .addImm(LSB)
2483 .addImm(LSB + Width - 1);
2484 I.eraseFromParent();
2485 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2486 }
2487 case TargetOpcode::G_BRCOND:
2488 return selectCompareBranch(I, MF, MRI);
2489
2490 case TargetOpcode::G_BRINDIRECT: {
2491 I.setDesc(TII.get(AArch64::BR));
2492 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2493 }
2494
2495 case TargetOpcode::G_BRJT:
2496 return selectBrJT(I, MRI);
2497
2498 case AArch64::G_ADD_LOW: {
2499 // This op may have been separated from it's ADRP companion by the localizer
2500 // or some other code motion pass. Given that many CPUs will try to
2501 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2502 // which will later be expanded into an ADRP+ADD pair after scheduling.
2503 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2504 if (BaseMI->getOpcode() != AArch64::ADRP) {
2505 I.setDesc(TII.get(AArch64::ADDXri));
2506 I.addOperand(MachineOperand::CreateImm(0));
2507 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2508 }
2509 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2510, __extension__ __PRETTY_FUNCTION__))
2510 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2510, __extension__ __PRETTY_FUNCTION__))
;
2511 auto Op1 = BaseMI->getOperand(1);
2512 auto Op2 = I.getOperand(2);
2513 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2514 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2515 Op1.getTargetFlags())
2516 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2517 Op2.getTargetFlags());
2518 I.eraseFromParent();
2519 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2520 }
2521
2522 case TargetOpcode::G_BSWAP: {
2523 // Handle vector types for G_BSWAP directly.
2524 Register DstReg = I.getOperand(0).getReg();
2525 LLT DstTy = MRI.getType(DstReg);
2526
2527 // We should only get vector types here; everything else is handled by the
2528 // importer right now.
2529 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2530 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2531 return false;
2532 }
2533
2534 // Only handle 4 and 2 element vectors for now.
2535 // TODO: 16-bit elements.
2536 unsigned NumElts = DstTy.getNumElements();
2537 if (NumElts != 4 && NumElts != 2) {
2538 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2539 return false;
2540 }
2541
2542 // Choose the correct opcode for the supported types. Right now, that's
2543 // v2s32, v4s32, and v2s64.
2544 unsigned Opc = 0;
2545 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2546 if (EltSize == 32)
2547 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2548 : AArch64::REV32v16i8;
2549 else if (EltSize == 64)
2550 Opc = AArch64::REV64v16i8;
2551
2552 // We should always get something by the time we get here...
2553 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2553, __extension__ __PRETTY_FUNCTION__))
;
2554
2555 I.setDesc(TII.get(Opc));
2556 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2557 }
2558
2559 case TargetOpcode::G_FCONSTANT:
2560 case TargetOpcode::G_CONSTANT: {
2561 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2562
2563 const LLT s8 = LLT::scalar(8);
2564 const LLT s16 = LLT::scalar(16);
2565 const LLT s32 = LLT::scalar(32);
2566 const LLT s64 = LLT::scalar(64);
2567 const LLT s128 = LLT::scalar(128);
2568 const LLT p0 = LLT::pointer(0, 64);
2569
2570 const Register DefReg = I.getOperand(0).getReg();
2571 const LLT DefTy = MRI.getType(DefReg);
2572 const unsigned DefSize = DefTy.getSizeInBits();
2573 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2574
2575 // FIXME: Redundant check, but even less readable when factored out.
2576 if (isFP) {
2577 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2578 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2579 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2580 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2581 return false;
2582 }
2583
2584 if (RB.getID() != AArch64::FPRRegBankID) {
2585 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2586 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2587 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2588 return false;
2589 }
2590
2591 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2592 // can be sure tablegen works correctly and isn't rescued by this code.
2593 // 0.0 is not covered by tablegen for FP128. So we will handle this
2594 // scenario in the code here.
2595 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2596 return false;
2597 } else {
2598 // s32 and s64 are covered by tablegen.
2599 if (Ty != p0 && Ty != s8 && Ty != s16) {
2600 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2601 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2602 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2603 return false;
2604 }
2605
2606 if (RB.getID() != AArch64::GPRRegBankID) {
2607 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2608 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2609 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2610 return false;
2611 }
2612 }
2613
2614 if (isFP) {
2615 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2616 // For 16, 64, and 128b values, emit a constant pool load.
2617 switch (DefSize) {
2618 default:
2619 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2619)
;
2620 case 32:
2621 // For s32, use a cp load if we have optsize/minsize.
2622 if (!shouldOptForSize(&MF))
2623 break;
2624 [[fallthrough]];
2625 case 16:
2626 case 64:
2627 case 128: {
2628 auto *FPImm = I.getOperand(1).getFPImm();
2629 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2630 if (!LoadMI) {
2631 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2632 return false;
2633 }
2634 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2635 I.eraseFromParent();
2636 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2637 }
2638 }
2639
2640 // Either emit a FMOV, or emit a copy to emit a normal mov.
2641 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2642, __extension__ __PRETTY_FUNCTION__))
2642 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2642, __extension__ __PRETTY_FUNCTION__))
;
2643 const Register DefGPRReg =
2644 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2645 MachineOperand &RegOp = I.getOperand(0);
2646 RegOp.setReg(DefGPRReg);
2647 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2648 MIB.buildCopy({DefReg}, {DefGPRReg});
2649
2650 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2651 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2652 return false;
2653 }
2654
2655 MachineOperand &ImmOp = I.getOperand(1);
2656 // FIXME: Is going through int64_t always correct?
2657 ImmOp.ChangeToImmediate(
2658 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2659 } else if (I.getOperand(1).isCImm()) {
2660 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2661 I.getOperand(1).ChangeToImmediate(Val);
2662 } else if (I.getOperand(1).isImm()) {
2663 uint64_t Val = I.getOperand(1).getImm();
2664 I.getOperand(1).ChangeToImmediate(Val);
2665 }
2666
2667 const unsigned MovOpc =
2668 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2669 I.setDesc(TII.get(MovOpc));
2670 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2671 return true;
2672 }
2673 case TargetOpcode::G_EXTRACT: {
2674 Register DstReg = I.getOperand(0).getReg();
2675 Register SrcReg = I.getOperand(1).getReg();
2676 LLT SrcTy = MRI.getType(SrcReg);
2677 LLT DstTy = MRI.getType(DstReg);
2678 (void)DstTy;
2679 unsigned SrcSize = SrcTy.getSizeInBits();
2680
2681 if (SrcTy.getSizeInBits() > 64) {
2682 // This should be an extract of an s128, which is like a vector extract.
2683 if (SrcTy.getSizeInBits() != 128)
2684 return false;
2685 // Only support extracting 64 bits from an s128 at the moment.
2686 if (DstTy.getSizeInBits() != 64)
2687 return false;
2688
2689 unsigned Offset = I.getOperand(2).getImm();
2690 if (Offset % 64 != 0)
2691 return false;
2692
2693 // Check we have the right regbank always.
2694 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2695 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2696 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2696, __extension__ __PRETTY_FUNCTION__))
;
2697
2698 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2699 auto NewI =
2700 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2701 .addUse(SrcReg, 0,
2702 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2703 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2704 AArch64::GPR64RegClass, NewI->getOperand(0));
2705 I.eraseFromParent();
2706 return true;
2707 }
2708
2709 // Emit the same code as a vector extract.
2710 // Offset must be a multiple of 64.
2711 unsigned LaneIdx = Offset / 64;
2712 MachineInstr *Extract = emitExtractVectorElt(
2713 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2714 if (!Extract)
2715 return false;
2716 I.eraseFromParent();
2717 return true;
2718 }
2719
2720 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2721 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2722 Ty.getSizeInBits() - 1);
2723
2724 if (SrcSize < 64) {
2725 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
2726 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2726, __extension__ __PRETTY_FUNCTION__))
;
2727 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2728 }
2729
2730 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2731 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2732 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2733 .addReg(DstReg, 0, AArch64::sub_32);
2734 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2735 AArch64::GPR32RegClass, MRI);
2736 I.getOperand(0).setReg(DstReg);
2737
2738 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739 }
2740
2741 case TargetOpcode::G_INSERT: {
2742 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2743 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2744 unsigned DstSize = DstTy.getSizeInBits();
2745 // Larger inserts are vectors, same-size ones should be something else by
2746 // now (split up or turned into COPYs).
2747 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2748 return false;
2749
2750 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2751 unsigned LSB = I.getOperand(3).getImm();
2752 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2753 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2754 MachineInstrBuilder(MF, I).addImm(Width - 1);
2755
2756 if (DstSize < 64) {
2757 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
2758 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2758, __extension__ __PRETTY_FUNCTION__))
;
2759 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2760 }
2761
2762 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2763 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2764 TII.get(AArch64::SUBREG_TO_REG))
2765 .addDef(SrcReg)
2766 .addImm(0)
2767 .addUse(I.getOperand(2).getReg())
2768 .addImm(AArch64::sub_32);
2769 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2770 AArch64::GPR32RegClass, MRI);
2771 I.getOperand(2).setReg(SrcReg);
2772
2773 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2774 }
2775 case TargetOpcode::G_FRAME_INDEX: {
2776 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2777 if (Ty != LLT::pointer(0, 64)) {
2778 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2779 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2780 return false;
2781 }
2782 I.setDesc(TII.get(AArch64::ADDXri));
2783
2784 // MOs for a #0 shifted immediate.
2785 I.addOperand(MachineOperand::CreateImm(0));
2786 I.addOperand(MachineOperand::CreateImm(0));
2787
2788 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2789 }
2790
2791 case TargetOpcode::G_GLOBAL_VALUE: {
2792 auto GV = I.getOperand(1).getGlobal();
2793 if (GV->isThreadLocal())
2794 return selectTLSGlobalValue(I, MRI);
2795
2796 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2797 if (OpFlags & AArch64II::MO_GOT) {
2798 I.setDesc(TII.get(AArch64::LOADgot));
2799 I.getOperand(1).setTargetFlags(OpFlags);
2800 } else if (TM.getCodeModel() == CodeModel::Large) {
2801 // Materialize the global using movz/movk instructions.
2802 materializeLargeCMVal(I, GV, OpFlags);
2803 I.eraseFromParent();
2804 return true;
2805 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2806 I.setDesc(TII.get(AArch64::ADR));
2807 I.getOperand(1).setTargetFlags(OpFlags);
2808 } else {
2809 I.setDesc(TII.get(AArch64::MOVaddr));
2810 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2811 MachineInstrBuilder MIB(MF, I);
2812 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2813 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2814 }
2815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2816 }
2817
2818 case TargetOpcode::G_ZEXTLOAD:
2819 case TargetOpcode::G_LOAD:
2820 case TargetOpcode::G_STORE: {
2821 GLoadStore &LdSt = cast<GLoadStore>(I);
2822 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2823 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2824
2825 if (PtrTy != LLT::pointer(0, 64)) {
2826 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2827 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2828 return false;
2829 }
2830
2831 uint64_t MemSizeInBytes = LdSt.getMemSize();
2832 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2833 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2834
2835 // Need special instructions for atomics that affect ordering.
2836 if (Order != AtomicOrdering::NotAtomic &&
2837 Order != AtomicOrdering::Unordered &&
2838 Order != AtomicOrdering::Monotonic) {
2839 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2839, __extension__ __PRETTY_FUNCTION__))
;
2840 if (MemSizeInBytes > 64)
2841 return false;
2842
2843 if (isa<GLoad>(LdSt)) {
2844 static constexpr unsigned LDAPROpcodes[] = {
2845 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2846 static constexpr unsigned LDAROpcodes[] = {
2847 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2848 ArrayRef<unsigned> Opcodes =
2849 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2850 ? LDAPROpcodes
2851 : LDAROpcodes;
2852 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2853 } else {
2854 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2855 AArch64::STLRW, AArch64::STLRX};
2856 Register ValReg = LdSt.getReg(0);
2857 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2858 // Emit a subreg copy of 32 bits.
2859 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2860 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2861 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2862 I.getOperand(0).setReg(NewVal);
2863 }
2864 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2865 }
2866 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2867 return true;
2868 }
2869
2870#ifndef NDEBUG
2871 const Register PtrReg = LdSt.getPointerReg();
2872 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2873 // Check that the pointer register is valid.
2874 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2875, __extension__ __PRETTY_FUNCTION__))
2875 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2875, __extension__ __PRETTY_FUNCTION__))
;
2876 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
2877 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2877, __extension__ __PRETTY_FUNCTION__))
;
2878#endif
2879
2880 const Register ValReg = LdSt.getReg(0);
2881 const LLT ValTy = MRI.getType(ValReg);
2882 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2883
2884 // The code below doesn't support truncating stores, so we need to split it
2885 // again.
2886 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2887 unsigned SubReg;
2888 LLT MemTy = LdSt.getMMO().getMemoryType();
2889 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2890 if (!getSubRegForClass(RC, TRI, SubReg))
2891 return false;
2892
2893 // Generate a subreg copy.
2894 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2895 .addReg(ValReg, 0, SubReg)
2896 .getReg(0);
2897 RBI.constrainGenericRegister(Copy, *RC, MRI);
2898 LdSt.getOperand(0).setReg(Copy);
2899 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2900 // If this is an any-extending load from the FPR bank, split it into a regular
2901 // load + extend.
2902 if (RB.getID() == AArch64::FPRRegBankID) {
2903 unsigned SubReg;
2904 LLT MemTy = LdSt.getMMO().getMemoryType();
2905 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2906 if (!getSubRegForClass(RC, TRI, SubReg))
2907 return false;
2908 Register OldDst = LdSt.getReg(0);
2909 Register NewDst =
2910 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2911 LdSt.getOperand(0).setReg(NewDst);
2912 MRI.setRegBank(NewDst, RB);
2913 // Generate a SUBREG_TO_REG to extend it.
2914 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2915 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2916 .addImm(0)
2917 .addUse(NewDst)
2918 .addImm(SubReg);
2919 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
2920 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2921 MIB.setInstr(LdSt);
2922 }
2923 }
2924
2925 // Helper lambda for partially selecting I. Either returns the original
2926 // instruction with an updated opcode, or a new instruction.
2927 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2928 bool IsStore = isa<GStore>(I);
2929 const unsigned NewOpc =
2930 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2931 if (NewOpc == I.getOpcode())
2932 return nullptr;
2933 // Check if we can fold anything into the addressing mode.
2934 auto AddrModeFns =
2935 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2936 if (!AddrModeFns) {
2937 // Can't fold anything. Use the original instruction.
2938 I.setDesc(TII.get(NewOpc));
2939 I.addOperand(MachineOperand::CreateImm(0));
2940 return &I;
2941 }
2942
2943 // Folded something. Create a new instruction and return it.
2944 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2945 Register CurValReg = I.getOperand(0).getReg();
2946 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2947 NewInst.cloneMemRefs(I);
2948 for (auto &Fn : *AddrModeFns)
2949 Fn(NewInst);
2950 I.eraseFromParent();
2951 return &*NewInst;
2952 };
2953
2954 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2955 if (!LoadStore)
2956 return false;
2957
2958 // If we're storing a 0, use WZR/XZR.
2959 if (Opcode == TargetOpcode::G_STORE) {
2960 auto CVal = getIConstantVRegValWithLookThrough(
2961 LoadStore->getOperand(0).getReg(), MRI);
2962 if (CVal && CVal->Value == 0) {
2963 switch (LoadStore->getOpcode()) {
2964 case AArch64::STRWui:
2965 case AArch64::STRHHui:
2966 case AArch64::STRBBui:
2967 LoadStore->getOperand(0).setReg(AArch64::WZR);
2968 break;
2969 case AArch64::STRXui:
2970 LoadStore->getOperand(0).setReg(AArch64::XZR);
2971 break;
2972 }
2973 }
2974 }
2975
2976 if (IsZExtLoad) {
2977 // The zextload from a smaller type to i32 should be handled by the
2978 // importer.
2979 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2980 return false;
2981 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2982 // and zero_extend with SUBREG_TO_REG.
2983 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2984 Register DstReg = LoadStore->getOperand(0).getReg();
2985 LoadStore->getOperand(0).setReg(LdReg);
2986
2987 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2988 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2989 .addImm(0)
2990 .addUse(LdReg)
2991 .addImm(AArch64::sub_32);
2992 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2993 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2994 MRI);
2995 }
2996 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2997 }
2998
2999 case TargetOpcode::G_SMULH:
3000 case TargetOpcode::G_UMULH: {
3001 // Reject the various things we don't support yet.
3002 if (unsupportedBinOp(I, RBI, MRI, TRI))
3003 return false;
3004
3005 const Register DefReg = I.getOperand(0).getReg();
3006 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3007
3008 if (RB.getID() != AArch64::GPRRegBankID) {
3009 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
3010 return false;
3011 }
3012
3013 if (Ty != LLT::scalar(64)) {
3014 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
3015 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
3016 return false;
3017 }
3018
3019 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3020 : AArch64::UMULHrr;
3021 I.setDesc(TII.get(NewOpc));
3022
3023 // Now that we selected an opcode, we need to constrain the register
3024 // operands to use appropriate classes.
3025 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3026 }
3027 case TargetOpcode::G_LSHR:
3028 case TargetOpcode::G_ASHR:
3029 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3030 return selectVectorAshrLshr(I, MRI);
3031 [[fallthrough]];
3032 case TargetOpcode::G_SHL:
3033 if (Opcode == TargetOpcode::G_SHL &&
3034 MRI.getType(I.getOperand(0).getReg()).isVector())
3035 return selectVectorSHL(I, MRI);
3036
3037 // These shifts were legalized to have 64 bit shift amounts because we
3038 // want to take advantage of the selection patterns that assume the
3039 // immediates are s64s, however, selectBinaryOp will assume both operands
3040 // will have the same bit size.
3041 {
3042 Register SrcReg = I.getOperand(1).getReg();
3043 Register ShiftReg = I.getOperand(2).getReg();
3044 const LLT ShiftTy = MRI.getType(ShiftReg);
3045 const LLT SrcTy = MRI.getType(SrcReg);
3046 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3047 ShiftTy.getSizeInBits() == 64) {
3048 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3048, __extension__ __PRETTY_FUNCTION__))
;
3049 // Insert a subregister copy to implement a 64->32 trunc
3050 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3051 .addReg(ShiftReg, 0, AArch64::sub_32);
3052 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3053 I.getOperand(2).setReg(Trunc.getReg(0));
3054 }
3055 }
3056 [[fallthrough]];
3057 case TargetOpcode::G_OR: {
3058 // Reject the various things we don't support yet.
3059 if (unsupportedBinOp(I, RBI, MRI, TRI))
3060 return false;
3061
3062 const unsigned OpSize = Ty.getSizeInBits();
3063
3064 const Register DefReg = I.getOperand(0).getReg();
3065 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3066
3067 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3068 if (NewOpc == I.getOpcode())
3069 return false;
3070
3071 I.setDesc(TII.get(NewOpc));
3072 // FIXME: Should the type be always reset in setDesc?
3073
3074 // Now that we selected an opcode, we need to constrain the register
3075 // operands to use appropriate classes.
3076 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3077 }
3078
3079 case TargetOpcode::G_PTR_ADD: {
3080 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3081 I.eraseFromParent();
3082 return true;
3083 }
3084 case TargetOpcode::G_SADDO:
3085 case TargetOpcode::G_UADDO:
3086 case TargetOpcode::G_SSUBO:
3087 case TargetOpcode::G_USUBO: {
3088 // Emit the operation and get the correct condition code.
3089 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
3090 I.getOperand(2), I.getOperand(3), MIB);
3091
3092 // Now, put the overflow result in the register given by the first operand
3093 // to the overflow op. CSINC increments the result when the predicate is
3094 // false, so to get the increment when it's true, we need to use the
3095 // inverse. In this case, we want to increment when carry is set.
3096 Register ZReg = AArch64::WZR;
3097 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
3098 getInvertedCondCode(OpAndCC.second), MIB);
3099 I.eraseFromParent();
3100 return true;
3101 }
3102
3103 case TargetOpcode::G_PTRMASK: {
3104 Register MaskReg = I.getOperand(2).getReg();
3105 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3106 // TODO: Implement arbitrary cases
3107 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3108 return false;
3109
3110 uint64_t Mask = *MaskVal;
3111 I.setDesc(TII.get(AArch64::ANDXri));
3112 I.getOperand(2).ChangeToImmediate(
3113 AArch64_AM::encodeLogicalImmediate(Mask, 64));
3114
3115 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3116 }
3117 case TargetOpcode::G_PTRTOINT:
3118 case TargetOpcode::G_TRUNC: {
3119 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3120 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3121
3122 const Register DstReg = I.getOperand(0).getReg();
3123 const Register SrcReg = I.getOperand(1).getReg();
3124
3125 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3126 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3127
3128 if (DstRB.getID() != SrcRB.getID()) {
3129 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3130 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3131 return false;
3132 }
3133
3134 if (DstRB.getID() == AArch64::GPRRegBankID) {
3135 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3136 if (!DstRC)
3137 return false;
3138
3139 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3140 if (!SrcRC)
3141 return false;
3142
3143 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3144 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3145 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3146 return false;
3147 }
3148
3149 if (DstRC == SrcRC) {
3150 // Nothing to be done
3151 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3152 SrcTy == LLT::scalar(64)) {
3153 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3153)
;
3154 return false;
3155 } else if (DstRC == &AArch64::GPR32RegClass &&
3156 SrcRC == &AArch64::GPR64RegClass) {
3157 I.getOperand(1).setSubReg(AArch64::sub_32);
3158 } else {
3159 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3160 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3161 return false;
3162 }
3163
3164 I.setDesc(TII.get(TargetOpcode::COPY));
3165 return true;
3166 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3167 if (DstTy == LLT::fixed_vector(4, 16) &&
3168 SrcTy == LLT::fixed_vector(4, 32)) {
3169 I.setDesc(TII.get(AArch64::XTNv4i16));
3170 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3171 return true;
3172 }
3173
3174 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3175 MachineInstr *Extract = emitExtractVectorElt(
3176 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3177 if (!Extract)
3178 return false;
3179 I.eraseFromParent();
3180 return true;
3181 }
3182
3183 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3184 if (Opcode == TargetOpcode::G_PTRTOINT) {
3185 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3185, __extension__ __PRETTY_FUNCTION__))
;
3186 I.setDesc(TII.get(TargetOpcode::COPY));
3187 return selectCopy(I, TII, MRI, TRI, RBI);
3188 }
3189 }
3190
3191 return false;
3192 }
3193
3194 case TargetOpcode::G_ANYEXT: {
3195 if (selectUSMovFromExtend(I, MRI))
3196 return true;
3197
3198 const Register DstReg = I.getOperand(0).getReg();
3199 const Register SrcReg = I.getOperand(1).getReg();
3200
3201 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3202 if (RBDst.getID() != AArch64::GPRRegBankID) {
3203 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3204 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3205 return false;
3206 }
3207
3208 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3209 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3210 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3211 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3212 return false;
3213 }
3214
3215 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3216
3217 if (DstSize == 0) {
3218 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3219 return false;
3220 }
3221
3222 if (DstSize != 64 && DstSize > 32) {
3223 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3224 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3225 return false;
3226 }
3227 // At this point G_ANYEXT is just like a plain COPY, but we need
3228 // to explicitly form the 64-bit value if any.
3229 if (DstSize > 32) {
3230 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3231 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3232 .addDef(ExtSrc)
3233 .addImm(0)
3234 .addUse(SrcReg)
3235 .addImm(AArch64::sub_32);
3236 I.getOperand(1).setReg(ExtSrc);
3237 }
3238 return selectCopy(I, TII, MRI, TRI, RBI);
3239 }
3240
3241 case TargetOpcode::G_ZEXT:
3242 case TargetOpcode::G_SEXT_INREG:
3243 case TargetOpcode::G_SEXT: {
3244 if (selectUSMovFromExtend(I, MRI))
3245 return true;
3246
3247 unsigned Opcode = I.getOpcode();
3248 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3249 const Register DefReg = I.getOperand(0).getReg();
3250 Register SrcReg = I.getOperand(1).getReg();
3251 const LLT DstTy = MRI.getType(DefReg);
3252 const LLT SrcTy = MRI.getType(SrcReg);
3253 unsigned DstSize = DstTy.getSizeInBits();
3254 unsigned SrcSize = SrcTy.getSizeInBits();
3255
3256 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3257 // extended is encoded in the imm.
3258 if (Opcode == TargetOpcode::G_SEXT_INREG)
3259 SrcSize = I.getOperand(2).getImm();
3260
3261 if (DstTy.isVector())
3262 return false; // Should be handled by imported patterns.
3263
3264 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
3265 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
3266 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3266, __extension__ __PRETTY_FUNCTION__))
;
3267
3268 MachineInstr *ExtI;
3269
3270 // First check if we're extending the result of a load which has a dest type
3271 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3272 // GPR register on AArch64 and all loads which are smaller automatically
3273 // zero-extend the upper bits. E.g.
3274 // %v(s8) = G_LOAD %p, :: (load 1)
3275 // %v2(s32) = G_ZEXT %v(s8)
3276 if (!IsSigned) {
3277 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3278 bool IsGPR =
3279 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3280 if (LoadMI && IsGPR) {
3281 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3282 unsigned BytesLoaded = MemOp->getSize();
3283 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3284 return selectCopy(I, TII, MRI, TRI, RBI);
3285 }
3286
3287 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3288 // + SUBREG_TO_REG.
3289 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3290 Register SubregToRegSrc =
3291 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3292 const Register ZReg = AArch64::WZR;
3293 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3294 .addImm(0);
3295
3296 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3297 .addImm(0)
3298 .addUse(SubregToRegSrc)
3299 .addImm(AArch64::sub_32);
3300
3301 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3302 MRI)) {
3303 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3304 return false;
3305 }
3306
3307 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3308 MRI)) {
3309 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3310 return false;
3311 }
3312
3313 I.eraseFromParent();
3314 return true;
3315 }
3316 }
3317
3318 if (DstSize == 64) {
3319 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3320 // FIXME: Can we avoid manually doing this?
3321 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3322 MRI)) {
3323 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3324 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3325 return false;
3326 }
3327 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3328 {&AArch64::GPR64RegClass}, {})
3329 .addImm(0)
3330 .addUse(SrcReg)
3331 .addImm(AArch64::sub_32)
3332 .getReg(0);
3333 }
3334
3335 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3336 {DefReg}, {SrcReg})
3337 .addImm(0)
3338 .addImm(SrcSize - 1);
3339 } else if (DstSize <= 32) {
3340 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3341 {DefReg}, {SrcReg})
3342 .addImm(0)
3343 .addImm(SrcSize - 1);
3344 } else {
3345 return false;
3346 }
3347
3348 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3349 I.eraseFromParent();
3350 return true;
3351 }
3352
3353 case TargetOpcode::G_SITOFP:
3354 case TargetOpcode::G_UITOFP:
3355 case TargetOpcode::G_FPTOSI:
3356 case TargetOpcode::G_FPTOUI: {
3357 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3358 SrcTy = MRI.getType(I.getOperand(1).getReg());
3359 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3360 if (NewOpc == Opcode)
3361 return false;
3362
3363 I.setDesc(TII.get(NewOpc));
3364 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3365 I.setFlags(MachineInstr::NoFPExcept);
3366
3367 return true;
3368 }
3369
3370 case TargetOpcode::G_FREEZE:
3371 return selectCopy(I, TII, MRI, TRI, RBI);
3372
3373 case TargetOpcode::G_INTTOPTR:
3374 // The importer is currently unable to import pointer types since they
3375 // didn't exist in SelectionDAG.
3376 return selectCopy(I, TII, MRI, TRI, RBI);
3377
3378 case TargetOpcode::G_BITCAST:
3379 // Imported SelectionDAG rules can handle every bitcast except those that
3380 // bitcast from a type to the same type. Ideally, these shouldn't occur
3381 // but we might not run an optimizer that deletes them. The other exception
3382 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3383 // of them.
3384 return selectCopy(I, TII, MRI, TRI, RBI);
3385
3386 case TargetOpcode::G_SELECT: {
3387 auto &Sel = cast<GSelect>(I);
3388 const Register CondReg = Sel.getCondReg();
3389 const Register TReg = Sel.getTrueReg();
3390 const Register FReg = Sel.getFalseReg();
3391
3392 if (tryOptSelect(Sel))
3393 return true;
3394
3395 // Make sure to use an unused vreg instead of wzr, so that the peephole
3396 // optimizations will be able to optimize these.
3397 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3398 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3399 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3400 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3401 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3402 return false;
3403 Sel.eraseFromParent();
3404 return true;
3405 }
3406 case TargetOpcode::G_ICMP: {
3407 if (Ty.isVector())
3408 return selectVectorICmp(I, MRI);
3409
3410 if (Ty != LLT::scalar(32)) {
3411 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3412 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3413 return false;
3414 }
3415
3416 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3417 const AArch64CC::CondCode InvCC =
3418 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3419 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3420 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3421 /*Src2=*/AArch64::WZR, InvCC, MIB);
3422 I.eraseFromParent();
3423 return true;
3424 }
3425
3426 case TargetOpcode::G_FCMP: {
3427 CmpInst::Predicate Pred =
3428 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3429 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3430 Pred) ||
3431 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3432 return false;
3433 I.eraseFromParent();
3434 return true;
3435 }
3436 case TargetOpcode::G_VASTART:
3437 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3438 : selectVaStartAAPCS(I, MF, MRI);
3439 case TargetOpcode::G_INTRINSIC:
3440 return selectIntrinsic(I, MRI);
3441 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3442 return selectIntrinsicWithSideEffects(I, MRI);
3443 case TargetOpcode::G_IMPLICIT_DEF: {
3444 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3445 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3446 const Register DstReg = I.getOperand(0).getReg();
3447 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3448 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3449 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3450 return true;
3451 }
3452 case TargetOpcode::G_BLOCK_ADDR: {
3453 if (TM.getCodeModel() == CodeModel::Large) {
3454 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3455 I.eraseFromParent();
3456 return true;
3457 } else {
3458 I.setDesc(TII.get(AArch64::MOVaddrBA));
3459 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3460 I.getOperand(0).getReg())
3461 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3462 /* Offset */ 0, AArch64II::MO_PAGE)
3463 .addBlockAddress(
3464 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3465 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3466 I.eraseFromParent();
3467 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3468 }
3469 }
3470 case AArch64::G_DUP: {
3471 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3472 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3473 // difficult because at RBS we may end up pessimizing the fpr case if we
3474 // decided to add an anyextend to fix this. Manual selection is the most
3475 // robust solution for now.
3476 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3477 AArch64::GPRRegBankID)
3478 return false; // We expect the fpr regbank case to be imported.
3479 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3480 if (VecTy == LLT::fixed_vector(8, 8))
3481 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3482 else if (VecTy == LLT::fixed_vector(16, 8))
3483 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3484 else if (VecTy == LLT::fixed_vector(4, 16))
3485 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3486 else if (VecTy == LLT::fixed_vector(8, 16))
3487 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3488 else
3489 return false;
3490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3491 }
3492 case TargetOpcode::G_INTRINSIC_TRUNC:
3493 return selectIntrinsicTrunc(I, MRI);
3494 case TargetOpcode::G_INTRINSIC_ROUND:
3495 return selectIntrinsicRound(I, MRI);
3496 case TargetOpcode::G_BUILD_VECTOR:
3497 return selectBuildVector(I, MRI);
3498 case TargetOpcode::G_MERGE_VALUES:
3499 return selectMergeValues(I, MRI);
3500 case TargetOpcode::G_UNMERGE_VALUES:
3501 return selectUnmergeValues(I, MRI);
3502 case TargetOpcode::G_SHUFFLE_VECTOR:
3503 return selectShuffleVector(I, MRI);
3504 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3505 return selectExtractElt(I, MRI);
3506 case TargetOpcode::G_INSERT_VECTOR_ELT:
3507 return selectInsertElt(I, MRI);
3508 case TargetOpcode::G_CONCAT_VECTORS:
3509 return selectConcatVectors(I, MRI);
3510 case TargetOpcode::G_JUMP_TABLE:
3511 return selectJumpTable(I, MRI);
3512 case TargetOpcode::G_VECREDUCE_FADD:
3513 case TargetOpcode::G_VECREDUCE_ADD:
3514 return selectReduction(I, MRI);
3515 case TargetOpcode::G_MEMCPY:
3516 case TargetOpcode::G_MEMCPY_INLINE:
3517 case TargetOpcode::G_MEMMOVE:
3518 case TargetOpcode::G_MEMSET:
3519 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature")(static_cast <bool> (STI.hasMOPS() && "Shouldn't get here without +mops feature"
) ? void (0) : __assert_fail ("STI.hasMOPS() && \"Shouldn't get here without +mops feature\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3519, __extension__ __PRETTY_FUNCTION__))
;
3520 return selectMOPS(I, MRI);
3521 }
3522
3523 return false;
3524}
3525
3526bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3527 MachineRegisterInfo &MRI) {
3528 Register VecReg = I.getOperand(1).getReg();
3529 LLT VecTy = MRI.getType(VecReg);
3530 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3531 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3532 // a subregister copy afterwards.
3533 if (VecTy == LLT::fixed_vector(2, 32)) {
3534 Register DstReg = I.getOperand(0).getReg();
3535 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3536 {VecReg, VecReg});
3537 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3538 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3539 .getReg(0);
3540 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3541 I.eraseFromParent();
3542 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3543 }
3544
3545 unsigned Opc = 0;
3546 if (VecTy == LLT::fixed_vector(16, 8))
3547 Opc = AArch64::ADDVv16i8v;
3548 else if (VecTy == LLT::fixed_vector(8, 16))
3549 Opc = AArch64::ADDVv8i16v;
3550 else if (VecTy == LLT::fixed_vector(4, 32))
3551 Opc = AArch64::ADDVv4i32v;
3552 else if (VecTy == LLT::fixed_vector(2, 64))
3553 Opc = AArch64::ADDPv2i64p;
3554 else {
3555 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3556 return false;
3557 }
3558 I.setDesc(TII.get(Opc));
3559 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3560 }
3561
3562 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3563 unsigned Opc = 0;
3564 if (VecTy == LLT::fixed_vector(2, 32))
3565 Opc = AArch64::FADDPv2i32p;
3566 else if (VecTy == LLT::fixed_vector(2, 64))
3567 Opc = AArch64::FADDPv2i64p;
3568 else {
3569 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3570 return false;
3571 }
3572 I.setDesc(TII.get(Opc));
3573 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3574 }
3575 return false;
3576}
3577
3578bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3579 MachineRegisterInfo &MRI) {
3580 unsigned Mopcode;
1
'Mopcode' declared without an initial value
3581 switch (GI.getOpcode()) {
2
'Default' branch taken. Execution continues on line 3595
3582 case TargetOpcode::G_MEMCPY:
3583 case TargetOpcode::G_MEMCPY_INLINE:
3584 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3585 break;
3586 case TargetOpcode::G_MEMMOVE:
3587 Mopcode = AArch64::MOPSMemoryMovePseudo;
3588 break;
3589 case TargetOpcode::G_MEMSET:
3590 // For tagged memset see llvm.aarch64.mops.memset.tag
3591 Mopcode = AArch64::MOPSMemorySetPseudo;
3592 break;
3593 }
3594
3595 auto &DstPtr = GI.getOperand(0);
3596 auto &SrcOrVal = GI.getOperand(1);
3597 auto &Size = GI.getOperand(2);
3598
3599 // Create copies of the registers that can be clobbered.
3600 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3601 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3602 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3603
3604 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3
The left operand of '==' is a garbage value
3605 const auto &SrcValRegClass =
3606 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3607
3608 // Constrain to specific registers
3609 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3610 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3611 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3612
3613 MIB.buildCopy(DstPtrCopy, DstPtr);
3614 MIB.buildCopy(SrcValCopy, SrcOrVal);
3615 MIB.buildCopy(SizeCopy, Size);
3616
3617 // New instruction uses the copied registers because it must update them.
3618 // The defs are not used since they don't exist in G_MEM*. They are still
3619 // tied.
3620 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3621 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3622 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3623 if (IsSet) {
3624 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3625 {DstPtrCopy, SizeCopy, SrcValCopy});
3626 } else {
3627 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3628 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3629 {DstPtrCopy, SrcValCopy, SizeCopy});
3630 }
3631
3632 GI.eraseFromParent();
3633 return true;
3634}
3635
3636bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3637 MachineRegisterInfo &MRI) {
3638 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3638, __extension__ __PRETTY_FUNCTION__))
;
3639 Register JTAddr = I.getOperand(0).getReg();
3640 unsigned JTI = I.getOperand(1).getIndex();
3641 Register Index = I.getOperand(2).getReg();
3642
3643 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3644 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3645
3646 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3647 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3648 {TargetReg, ScratchReg}, {JTAddr, Index})
3649 .addJumpTableIndex(JTI);
3650 // Build the indirect branch.
3651 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3652 I.eraseFromParent();
3653 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3654}
3655
3656bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3657 MachineRegisterInfo &MRI) {
3658 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3658, __extension__ __PRETTY_FUNCTION__))
;
3659 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3659, __extension__ __PRETTY_FUNCTION__))
;
3660
3661 Register DstReg = I.getOperand(0).getReg();
3662 unsigned JTI = I.getOperand(1).getIndex();
3663 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3664 auto MovMI =
3665 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3666 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3667 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3668 I.eraseFromParent();
3669 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3670}
3671
3672bool AArch64InstructionSelector::selectTLSGlobalValue(
3673 MachineInstr &I, MachineRegisterInfo &MRI) {
3674 if (!STI.isTargetMachO())
3675 return false;
3676 MachineFunction &MF = *I.getParent()->getParent();
3677 MF.getFrameInfo().setAdjustsStack(true);
3678
3679 const auto &GlobalOp = I.getOperand(1);
3680 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3681, __extension__ __PRETTY_FUNCTION__))
3681 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3681, __extension__ __PRETTY_FUNCTION__))
;
3682 const GlobalValue &GV = *GlobalOp.getGlobal();
3683
3684 auto LoadGOT =
3685 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3686 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3687
3688 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3689 {LoadGOT.getReg(0)})
3690 .addImm(0);
3691
3692 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3693 // TLS calls preserve all registers except those that absolutely must be
3694 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3695 // silly).
3696 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3697 .addUse(AArch64::X0, RegState::Implicit)
3698 .addDef(AArch64::X0, RegState::Implicit)
3699 .addRegMask(TRI.getTLSCallPreservedMask());
3700
3701 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3702 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3703 MRI);
3704 I.eraseFromParent();
3705 return true;
3706}
3707
3708bool AArch64InstructionSelector::selectIntrinsicTrunc(
3709 MachineInstr &I, MachineRegisterInfo &MRI) const {
3710 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3711
3712 // Select the correct opcode.
3713 unsigned Opc = 0;
3714 if (!SrcTy.isVector()) {
3715 switch (SrcTy.getSizeInBits()) {
3716 default:
3717 case 16:
3718 Opc = AArch64::FRINTZHr;
3719 break;
3720 case 32:
3721 Opc = AArch64::FRINTZSr;
3722 break;
3723 case 64:
3724 Opc = AArch64::FRINTZDr;
3725 break;
3726 }
3727 } else {
3728 unsigned NumElts = SrcTy.getNumElements();
3729 switch (SrcTy.getElementType().getSizeInBits()) {
3730 default:
3731 break;
3732 case 16:
3733 if (NumElts == 4)
3734 Opc = AArch64::FRINTZv4f16;
3735 else if (NumElts == 8)
3736 Opc = AArch64::FRINTZv8f16;
3737 break;
3738 case 32:
3739 if (NumElts == 2)
3740 Opc = AArch64::FRINTZv2f32;
3741 else if (NumElts == 4)
3742 Opc = AArch64::FRINTZv4f32;
3743 break;
3744 case 64:
3745 if (NumElts == 2)
3746 Opc = AArch64::FRINTZv2f64;
3747 break;
3748 }
3749 }
3750
3751 if (!Opc) {
3752 // Didn't get an opcode above, bail.
3753 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3754 return false;
3755 }
3756
3757 // Legalization would have set us up perfectly for this; we just need to
3758 // set the opcode and move on.
3759 I.setDesc(TII.get(Opc));
3760 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3761}
3762
3763bool AArch64InstructionSelector::selectIntrinsicRound(
3764 MachineInstr &I, MachineRegisterInfo &MRI) const {
3765 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3766
3767 // Select the correct opcode.
3768 unsigned Opc = 0;
3769 if (!SrcTy.isVector()) {
3770 switch (SrcTy.getSizeInBits()) {
3771 default:
3772 case 16:
3773 Opc = AArch64::FRINTAHr;
3774 break;
3775 case 32:
3776 Opc = AArch64::FRINTASr;
3777 break;
3778 case 64:
3779 Opc = AArch64::FRINTADr;
3780 break;
3781 }
3782 } else {
3783 unsigned NumElts = SrcTy.getNumElements();
3784 switch (SrcTy.getElementType().getSizeInBits()) {
3785 default:
3786 break;
3787 case 16:
3788 if (NumElts == 4)
3789 Opc = AArch64::FRINTAv4f16;
3790 else if (NumElts == 8)
3791 Opc = AArch64::FRINTAv8f16;
3792 break;
3793 case 32:
3794 if (NumElts == 2)
3795 Opc = AArch64::FRINTAv2f32;
3796 else if (NumElts == 4)
3797 Opc = AArch64::FRINTAv4f32;
3798 break;
3799 case 64:
3800 if (NumElts == 2)
3801 Opc = AArch64::FRINTAv2f64;
3802 break;
3803 }
3804 }
3805
3806 if (!Opc) {
3807 // Didn't get an opcode above, bail.
3808 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3809 return false;
3810 }
3811
3812 // Legalization would have set us up perfectly for this; we just need to
3813 // set the opcode and move on.
3814 I.setDesc(TII.get(Opc));
3815 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3816}
3817
3818bool AArch64InstructionSelector::selectVectorICmp(
3819 MachineInstr &I, MachineRegisterInfo &MRI) {
3820 Register DstReg = I.getOperand(0).getReg();
3821 LLT DstTy = MRI.getType(DstReg);
3822 Register SrcReg = I.getOperand(2).getReg();
3823 Register Src2Reg = I.getOperand(3).getReg();
3824 LLT SrcTy = MRI.getType(SrcReg);
3825
3826 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3827 unsigned NumElts = DstTy.getNumElements();
3828
3829 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3830 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3831 // Third index is cc opcode:
3832 // 0 == eq
3833 // 1 == ugt
3834 // 2 == uge
3835 // 3 == ult
3836 // 4 == ule
3837 // 5 == sgt
3838 // 6 == sge
3839 // 7 == slt
3840 // 8 == sle
3841 // ne is done by negating 'eq' result.
3842
3843 // This table below assumes that for some comparisons the operands will be
3844 // commuted.
3845 // ult op == commute + ugt op
3846 // ule op == commute + uge op
3847 // slt op == commute + sgt op
3848 // sle op == commute + sge op
3849 unsigned PredIdx = 0;
3850 bool SwapOperands = false;
3851 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3852 switch (Pred) {
3853 case CmpInst::ICMP_NE:
3854 case CmpInst::ICMP_EQ:
3855 PredIdx = 0;
3856 break;
3857 case CmpInst::ICMP_UGT:
3858 PredIdx = 1;
3859 break;
3860 case CmpInst::ICMP_UGE:
3861 PredIdx = 2;
3862 break;
3863 case CmpInst::ICMP_ULT:
3864 PredIdx = 3;
3865 SwapOperands = true;
3866 break;
3867 case CmpInst::ICMP_ULE:
3868 PredIdx = 4;
3869 SwapOperands = true;
3870 break;
3871 case CmpInst::ICMP_SGT:
3872 PredIdx = 5;
3873 break;
3874 case CmpInst::ICMP_SGE:
3875 PredIdx = 6;
3876 break;
3877 case CmpInst::ICMP_SLT:
3878 PredIdx = 7;
3879 SwapOperands = true;
3880 break;
3881 case CmpInst::ICMP_SLE:
3882 PredIdx = 8;
3883 SwapOperands = true;
3884 break;
3885 default:
3886 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3886)
;
3887 return false;
3888 }
3889
3890 // This table obviously should be tablegen'd when we have our GISel native
3891 // tablegen selector.
3892
3893 static const unsigned OpcTable[4][4][9] = {
3894 {
3895 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3896 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3897 0 /* invalid */},
3898 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3899 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3900 0 /* invalid */},
3901 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3902 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3903 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3904 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3905 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3906 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3907 },
3908 {
3909 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3910 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3911 0 /* invalid */},
3912 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3913 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3914 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3915 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3916 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3917 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3918 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3919 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3920 0 /* invalid */}
3921 },
3922 {
3923 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3924 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3925 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3926 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3927 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3928 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3929 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3930 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3931 0 /* invalid */},
3932 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3933 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3934 0 /* invalid */}
3935 },
3936 {
3937 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3938 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3939 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3940 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3941 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3942 0 /* invalid */},
3943 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3944 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3945 0 /* invalid */},
3946 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3947 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3948 0 /* invalid */}
3949 },
3950 };
3951 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3952 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3953 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3954 if (!Opc) {
3955 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3956 return false;
3957 }
3958
3959 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3960 const TargetRegisterClass *SrcRC =
3961 getRegClassForTypeOnBank(SrcTy, VecRB, true);
3962 if (!SrcRC) {
3963 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3964 return false;
3965 }
3966
3967 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3968 if (SrcTy.getSizeInBits() == 128)
3969 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3970
3971 if (SwapOperands)
3972 std::swap(SrcReg, Src2Reg);
3973
3974 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3975 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3976
3977 // Invert if we had a 'ne' cc.
3978 if (NotOpc) {
3979 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3980 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3981 } else {
3982 MIB.buildCopy(DstReg, Cmp.getReg(0));
3983 }
3984 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3985 I.eraseFromParent();
3986 return true;
3987}
3988
3989MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3990 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3991 MachineIRBuilder &MIRBuilder) const {
3992 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3993
3994 auto BuildFn = [&](unsigned SubregIndex) {
3995 auto Ins =
3996 MIRBuilder
3997 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3998 .addImm(SubregIndex);
3999 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
4000 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
4001 return &*Ins;
4002 };
4003
4004 switch (EltSize) {
4005 case 8:
4006 return BuildFn(AArch64::bsub);
4007 case 16:
4008 return BuildFn(AArch64::hsub);
4009 case 32:
4010 return BuildFn(AArch64::ssub);
4011 case 64:
4012 return BuildFn(AArch64::dsub);
4013 default:
4014 return nullptr;
4015 }
4016}
4017
4018bool AArch64InstructionSelector::selectMergeValues(
4019 MachineInstr &I, MachineRegisterInfo &MRI) {
4020 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4020, __extension__ __PRETTY_FUNCTION__))
;
4021 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4022 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
4023 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4023, __extension__ __PRETTY_FUNCTION__))
;
4024 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4025
4026 if (I.getNumOperands() != 3)
4027 return false;
4028
4029 // Merging 2 s64s into an s128.
4030 if (DstTy == LLT::scalar(128)) {
4031 if (SrcTy.getSizeInBits() != 64)
4032 return false;
4033 Register DstReg = I.getOperand(0).getReg();
4034 Register Src1Reg = I.getOperand(1).getReg();
4035 Register Src2Reg = I.getOperand(2).getReg();
4036 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4037 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
4038 /* LaneIdx */ 0, RB, MIB);
4039 if (!InsMI)
4040 return false;
4041 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
4042 Src2Reg, /* LaneIdx */ 1, RB, MIB);
4043 if (!Ins2MI)
4044 return false;
4045 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4046 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
4047 I.eraseFromParent();
4048 return true;
4049 }
4050
4051 if (RB.getID() != AArch64::GPRRegBankID)
4052 return false;
4053
4054 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
4055 return false;
4056
4057 auto *DstRC = &AArch64::GPR64RegClass;
4058 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
4059 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4060 TII.get(TargetOpcode::SUBREG_TO_REG))
4061 .addDef(SubToRegDef)
4062 .addImm(0)
4063 .addUse(I.getOperand(1).getReg())
4064 .addImm(AArch64::sub_32);
4065 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
4066 // Need to anyext the second scalar before we can use bfm
4067 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
4068 TII.get(TargetOpcode::SUBREG_TO_REG))
4069 .addDef(SubToRegDef2)
4070 .addImm(0)
4071 .addUse(I.getOperand(2).getReg())
4072 .addImm(AArch64::sub_32);
4073 MachineInstr &BFM =
4074 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
4075 .addDef(I.getOperand(0).getReg())
4076 .addUse(SubToRegDef)
4077 .addUse(SubToRegDef2)
4078 .addImm(32)
4079 .addImm(31);
4080 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
4081 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
4082 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
4083 I.eraseFromParent();
4084 return true;
4085}
4086
4087static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
4088 const unsigned EltSize) {
4089 // Choose a lane copy opcode and subregister based off of the size of the
4090 // vector's elements.
4091 switch (EltSize) {
4092 case 8:
4093 CopyOpc = AArch64::DUPi8;
4094 ExtractSubReg = AArch64::bsub;
4095 break;
4096 case 16:
4097 CopyOpc = AArch64::DUPi16;
4098 ExtractSubReg = AArch64::hsub;
4099 break;
4100 case 32:
4101 CopyOpc = AArch64::DUPi32;
4102 ExtractSubReg = AArch64::ssub;
4103 break;
4104 case 64:
4105 CopyOpc = AArch64::DUPi64;
4106 ExtractSubReg = AArch64::dsub;
4107 break;
4108 default:
4109 // Unknown size, bail out.
4110 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
4111 return false;
4112 }
4113 return true;
4114}
4115
4116MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4117 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4118 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4119 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4120 unsigned CopyOpc = 0;
4121 unsigned ExtractSubReg = 0;
4122 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
4123 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
4124 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
4125 return nullptr;
4126 }
4127
4128 const TargetRegisterClass *DstRC =
4129 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
4130 if (!DstRC) {
4131 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
4132 return nullptr;
4133 }
4134
4135 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
4136 const LLT &VecTy = MRI.getType(VecReg);
4137 const TargetRegisterClass *VecRC =
4138 getRegClassForTypeOnBank(VecTy, VecRB, true);
4139 if (!VecRC) {
4140 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
4141 return nullptr;
4142 }
4143
4144 // The register that we're going to copy into.
4145 Register InsertReg = VecReg;
4146 if (!DstReg)
4147 DstReg = MRI.createVirtualRegister(DstRC);
4148 // If the lane index is 0, we just use a subregister COPY.
4149 if (LaneIdx == 0) {
4150 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4151 .addReg(VecReg, 0, ExtractSubReg);
4152 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4153 return &*Copy;
4154 }
4155
4156 // Lane copies require 128-bit wide registers. If we're dealing with an
4157 // unpacked vector, then we need to move up to that width. Insert an implicit
4158 // def and a subregister insert to get us there.
4159 if (VecTy.getSizeInBits() != 128) {
4160 MachineInstr *ScalarToVector = emitScalarToVector(
4161 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4162 if (!ScalarToVector)
4163 return nullptr;
4164 InsertReg = ScalarToVector->getOperand(0).getReg();
4165 }
4166
4167 MachineInstr *LaneCopyMI =
4168 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4169 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4170
4171 // Make sure that we actually constrain the initial copy.
4172 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4173 return LaneCopyMI;
4174}
4175
4176bool AArch64InstructionSelector::selectExtractElt(
4177 MachineInstr &I, MachineRegisterInfo &MRI) {
4178 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
4179 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4179, __extension__ __PRETTY_FUNCTION__))
;
4180 Register DstReg = I.getOperand(0).getReg();
4181 const LLT NarrowTy = MRI.getType(DstReg);
4182 const Register SrcReg = I.getOperand(1).getReg();
4183 const LLT WideTy = MRI.getType(SrcReg);
4184 (void)WideTy;
4185 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4186, __extension__ __PRETTY_FUNCTION__))
4186 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4186, __extension__ __PRETTY_FUNCTION__))
;
4187 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4187, __extension__ __PRETTY_FUNCTION__))
;
4188
4189 // Need the lane index to determine the correct copy opcode.
4190 MachineOperand &LaneIdxOp = I.getOperand(2);
4191 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4191, __extension__ __PRETTY_FUNCTION__))
;
4192
4193 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4194 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4195 return false;
4196 }
4197
4198 // Find the index to extract from.
4199 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4200 if (!VRegAndVal)
4201 return false;
4202 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4203
4204
4205 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4206 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4207 LaneIdx, MIB);
4208 if (!Extract)
4209 return false;
4210
4211 I.eraseFromParent();
4212 return true;
4213}
4214
4215bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4216 MachineInstr &I, MachineRegisterInfo &MRI) {
4217 unsigned NumElts = I.getNumOperands() - 1;
4218 Register SrcReg = I.getOperand(NumElts).getReg();
4219 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4220 const LLT SrcTy = MRI.getType(SrcReg);
4221
4222 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4222, __extension__ __PRETTY_FUNCTION__))
;
4223 if (SrcTy.getSizeInBits() > 128) {
4224 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4225 return false;
4226 }
4227
4228 // We implement a split vector operation by treating the sub-vectors as
4229 // scalars and extracting them.
4230 const RegisterBank &DstRB =
4231 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4232 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4233 Register Dst = I.getOperand(OpIdx).getReg();
4234 MachineInstr *Extract =
4235 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4236 if (!Extract)
4237 return false;
4238 }
4239 I.eraseFromParent();
4240 return true;
4241}
4242
4243bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4244 MachineRegisterInfo &MRI) {
4245 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4246, __extension__ __PRETTY_FUNCTION__))
4246 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4246, __extension__ __PRETTY_FUNCTION__))
;
4247
4248 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4249 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4250 AArch64::FPRRegBankID ||
4251 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4252 AArch64::FPRRegBankID) {
4253 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4254 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4255 return false;
4256 }
4257
4258 // The last operand is the vector source register, and every other operand is
4259 // a register to unpack into.
4260 unsigned NumElts = I.getNumOperands() - 1;
4261 Register SrcReg = I.getOperand(NumElts).getReg();
4262 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4263 const LLT WideTy = MRI.getType(SrcReg);
4264 (void)WideTy;
4265 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4266, __extension__ __PRETTY_FUNCTION__))
4266 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4266, __extension__ __PRETTY_FUNCTION__))
;
4267 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4268, __extension__ __PRETTY_FUNCTION__))
4268 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4268, __extension__ __PRETTY_FUNCTION__))
;
4269
4270 if (!NarrowTy.isScalar())
4271 return selectSplitVectorUnmerge(I, MRI);
4272
4273 // Choose a lane copy opcode and subregister based off of the size of the
4274 // vector's elements.
4275 unsigned CopyOpc = 0;
4276 unsigned ExtractSubReg = 0;
4277 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4278 return false;
4279
4280 // Set up for the lane copies.
4281 MachineBasicBlock &MBB = *I.getParent();
4282
4283 // Stores the registers we'll be copying from.
4284 SmallVector<Register, 4> InsertRegs;
4285
4286 // We'll use the first register twice, so we only need NumElts-1 registers.
4287 unsigned NumInsertRegs = NumElts - 1;
4288
4289 // If our elements fit into exactly 128 bits, then we can copy from the source
4290 // directly. Otherwise, we need to do a bit of setup with some subregister
4291 // inserts.
4292 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4293 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4294 } else {
4295 // No. We have to perform subregister inserts. For each insert, create an
4296 // implicit def and a subregister insert, and save the register we create.
4297 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4298 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()),
4299 *RBI.getRegBank(SrcReg, MRI, TRI));
4300 unsigned SubReg = 0;
4301 bool Found = getSubRegForClass(RC, TRI, SubReg);
4302 (void)Found;
4303 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4303, __extension__ __PRETTY_FUNCTION__))
;
4304 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4305 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4306 MachineInstr &ImpDefMI =
4307 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4308 ImpDefReg);
4309
4310 // Now, create the subregister insert from SrcReg.
4311 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4312 MachineInstr &InsMI =
4313 *BuildMI(MBB, I, I.getDebugLoc(),
4314 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4315 .addUse(ImpDefReg)
4316 .addUse(SrcReg)
4317 .addImm(SubReg);
4318
4319 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4320 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4321
4322 // Save the register so that we can copy from it after.
4323 InsertRegs.push_back(InsertReg);
4324 }
4325 }
4326
4327 // Now that we've created any necessary subregister inserts, we can
4328 // create the copies.
4329 //
4330 // Perform the first copy separately as a subregister copy.
4331 Register CopyTo = I.getOperand(0).getReg();
4332 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4333 .addReg(InsertRegs[0], 0, ExtractSubReg);
4334 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4335
4336 // Now, perform the remaining copies as vector lane copies.
4337 unsigned LaneIdx = 1;
4338 for (Register InsReg : InsertRegs) {
4339 Register CopyTo = I.getOperand(LaneIdx).getReg();
4340 MachineInstr &CopyInst =
4341 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4342 .addUse(InsReg)
4343 .addImm(LaneIdx);
4344 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4345 ++LaneIdx;
4346 }
4347
4348 // Separately constrain the first copy's destination. Because of the
4349 // limitation in constrainOperandRegClass, we can't guarantee that this will
4350 // actually be constrained. So, do it ourselves using the second operand.
4351 const TargetRegisterClass *RC =
4352 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4353 if (!RC) {
4354 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4355 return false;
4356 }
4357
4358 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4359 I.eraseFromParent();
4360 return true;
4361}
4362
4363bool AArch64InstructionSelector::selectConcatVectors(
4364 MachineInstr &I, MachineRegisterInfo &MRI) {
4365 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4366, __extension__ __PRETTY_FUNCTION__))
4366 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4366, __extension__ __PRETTY_FUNCTION__))
;
4367 Register Dst = I.getOperand(0).getReg();
4368 Register Op1 = I.getOperand(1).getReg();
4369 Register Op2 = I.getOperand(2).getReg();
4370 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4371 if (!ConcatMI)
4372 return false;
4373 I.eraseFromParent();
4374 return true;
4375}
4376
4377unsigned
4378AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4379 MachineFunction &MF) const {
4380 Type *CPTy = CPVal->getType();
4381 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4382
4383 MachineConstantPool *MCP = MF.getConstantPool();
4384 return MCP->getConstantPoolIndex(CPVal, Alignment);
4385}
4386
4387MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4388 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4389 const TargetRegisterClass *RC;
4390 unsigned Opc;
4391 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4392 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4393 switch (Size) {
4394 case 16:
4395 RC = &AArch64::FPR128RegClass;
4396 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4397 break;
4398 case 8:
4399 RC = &AArch64::FPR64RegClass;
4400 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4401 break;
4402 case 4:
4403 RC = &AArch64::FPR32RegClass;
4404 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4405 break;
4406 case 2:
4407 RC = &AArch64::FPR16RegClass;
4408 Opc = AArch64::LDRHui;
4409 break;
4410 default:
4411 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4412 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4413 return nullptr;
4414 }
4415
4416 MachineInstr *LoadMI = nullptr;
4417 auto &MF = MIRBuilder.getMF();
4418 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4419 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4420 // Use load(literal) for tiny code model.
4421 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4422 } else {
4423 auto Adrp =
4424 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4425 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4426
4427 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4428 .addConstantPoolIndex(
4429 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4430
4431 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4432 }
4433
4434 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4435 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4436 MachineMemOperand::MOLoad,
4437 Size, Align(Size)));
4438 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4439 return LoadMI;
4440}
4441
4442/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4443/// size and RB.
4444static std::pair<unsigned, unsigned>
4445getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4446 unsigned Opc, SubregIdx;
4447 if (RB.getID() == AArch64::GPRRegBankID) {
4448 if (EltSize == 16) {
4449 Opc = AArch64::INSvi16gpr;
4450 SubregIdx = AArch64::ssub;
4451 } else if (EltSize == 32) {
4452 Opc = AArch64::INSvi32gpr;
4453 SubregIdx = AArch64::ssub;
4454 } else if (EltSize == 64) {
4455 Opc = AArch64::INSvi64gpr;
4456 SubregIdx = AArch64::dsub;
4457 } else {
4458 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4458)
;
4459 }
4460 } else {
4461 if (EltSize == 8) {
4462 Opc = AArch64::INSvi8lane;
4463 SubregIdx = AArch64::bsub;
4464 } else if (EltSize == 16) {
4465 Opc = AArch64::INSvi16lane;
4466 SubregIdx = AArch64::hsub;
4467 } else if (EltSize == 32) {
4468 Opc = AArch64::INSvi32lane;
4469 SubregIdx = AArch64::ssub;
4470 } else if (EltSize == 64) {
4471 Opc = AArch64::INSvi64lane;
4472 SubregIdx = AArch64::dsub;
4473 } else {
4474 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4474)
;
4475 }
4476 }
4477 return std::make_pair(Opc, SubregIdx);
4478}
4479
4480MachineInstr *AArch64InstructionSelector::emitInstr(
4481 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4482 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4483 const ComplexRendererFns &RenderFns) const {
4484 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4484, __extension__ __PRETTY_FUNCTION__))
;
4485 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
4486 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
;
4487 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4488 if (RenderFns)
4489 for (auto &Fn : *RenderFns)
4490 Fn(MI);
4491 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4492 return &*MI;
4493}
4494
4495MachineInstr *AArch64InstructionSelector::emitAddSub(
4496 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4497 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4498 MachineIRBuilder &MIRBuilder) const {
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4500, __extension__ __PRETTY_FUNCTION__))
;
4501 auto Ty = MRI.getType(LHS.getReg());
4502 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4502, __extension__ __PRETTY_FUNCTION__))
;
4503 unsigned Size = Ty.getSizeInBits();
4504 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4504, __extension__ __PRETTY_FUNCTION__))
;
4505 bool Is32Bit = Size == 32;
4506
4507 // INSTRri form with positive arithmetic immediate.
4508 if (auto Fns = selectArithImmed(RHS))
4509 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4510 MIRBuilder, Fns);
4511
4512 // INSTRri form with negative arithmetic immediate.
4513 if (auto Fns = selectNegArithImmed(RHS))
4514 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4515 MIRBuilder, Fns);
4516
4517 // INSTRrx form.
4518 if (auto Fns = selectArithExtendedRegister(RHS))
4519 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4520 MIRBuilder, Fns);
4521
4522 // INSTRrs form.
4523 if (auto Fns = selectShiftedRegister(RHS))
4524 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4525 MIRBuilder, Fns);
4526 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4527 MIRBuilder);
4528}
4529
4530MachineInstr *
4531AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4532 MachineOperand &RHS,
4533 MachineIRBuilder &MIRBuilder) const {
4534 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4535 {{AArch64::ADDXri, AArch64::ADDWri},
4536 {AArch64::ADDXrs, AArch64::ADDWrs},
4537 {AArch64::ADDXrr, AArch64::ADDWrr},
4538 {AArch64::SUBXri, AArch64::SUBWri},
4539 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4540 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4541}
4542
4543MachineInstr *
4544AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4545 MachineOperand &RHS,
4546 MachineIRBuilder &MIRBuilder) const {
4547 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4548 {{AArch64::ADDSXri, AArch64::ADDSWri},
4549 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4550 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4551 {AArch64::SUBSXri, AArch64::SUBSWri},
4552 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4553 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4554}
4555
4556MachineInstr *
4557AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4558 MachineOperand &RHS,
4559 MachineIRBuilder &MIRBuilder) const {
4560 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4561 {{AArch64::SUBSXri, AArch64::SUBSWri},
4562 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4563 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4564 {AArch64::ADDSXri, AArch64::ADDSWri},
4565 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4566 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4567}
4568
4569MachineInstr *
4570AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4571 MachineIRBuilder &MIRBuilder) const {
4572 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4573 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4574 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4575 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4576}
4577
4578MachineInstr *
4579AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4580 MachineIRBuilder &MIRBuilder) const {
4581 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4581, __extension__ __PRETTY_FUNCTION__))
;
4582 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4583 LLT Ty = MRI.getType(LHS.getReg());
4584 unsigned RegSize = Ty.getSizeInBits();
4585 bool Is32Bit = (RegSize == 32);
4586 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4587 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4588 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4589 // ANDS needs a logical immediate for its immediate form. Check if we can
4590 // fold one in.
4591 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4592 int64_t Imm = ValAndVReg->Value.getSExtValue();
4593
4594 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4595 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4596 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4597 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4598 return &*TstMI;
4599 }
4600 }
4601
4602 if (auto Fns = selectLogicalShiftedRegister(RHS))
4603 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4604 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4605}
4606
4607MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4608 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4609 MachineIRBuilder &MIRBuilder) const {
4610 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4610, __extension__ __PRETTY_FUNCTION__))
;
4611 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4611, __extension__ __PRETTY_FUNCTION__))
;
4612 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4613 LLT CmpTy = MRI.getType(LHS.getReg());
4614 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4614, __extension__ __PRETTY_FUNCTION__))
;
4615 unsigned Size = CmpTy.getSizeInBits();
4616 (void)Size;
4617 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4617, __extension__ __PRETTY_FUNCTION__))
;
4618 // Fold the compare into a cmn or tst if possible.
4619 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4620 return FoldCmp;
4621 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4622 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4623}
4624
4625MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4626 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4627 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4628#ifndef NDEBUG
4629 LLT Ty = MRI.getType(Dst);
4630 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
4631 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4631, __extension__ __PRETTY_FUNCTION__))
;
4632#endif
4633 const Register ZReg = AArch64::WZR;
4634 AArch64CC::CondCode CC1, CC2;
4635 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4636 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4637 if (CC2 == AArch64CC::AL)
4638 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4639 MIRBuilder);
4640 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4641 Register Def1Reg = MRI.createVirtualRegister(RC);
4642 Register Def2Reg = MRI.createVirtualRegister(RC);
4643 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4644 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4645 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4646 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4647 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4648 return &*OrMI;
4649}
4650
4651MachineInstr *AArch64InstructionSelector::emitFPCompare(
4652 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4653 std::optional<CmpInst::Predicate> Pred) const {
4654 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4655 LLT Ty = MRI.getType(LHS);
4656 if (Ty.isVector())
4657 return nullptr;
4658 unsigned OpSize = Ty.getSizeInBits();
4659 if (OpSize != 32 && OpSize != 64)
4660 return nullptr;
4661
4662 // If this is a compare against +0.0, then we don't have
4663 // to explicitly materialize a constant.
4664 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4665 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4666
4667 auto IsEqualityPred = [](CmpInst::Predicate P) {
4668 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4669 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4670 };
4671 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4672 // Try commutating the operands.
4673 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4674 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4675 ShouldUseImm = true;
4676 std::swap(LHS, RHS);
4677 }
4678 }
4679 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4680 {AArch64::FCMPSri, AArch64::FCMPDri}};
4681 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4682
4683 // Partially build the compare. Decide if we need to add a use for the
4684 // third operand based off whether or not we're comparing against 0.0.
4685 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4686 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4687 if (!ShouldUseImm)
4688 CmpMI.addUse(RHS);
4689 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4690 return &*CmpMI;
4691}
4692
4693MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4694 std::optional<Register> Dst, Register Op1, Register Op2,
4695 MachineIRBuilder &MIRBuilder) const {
4696 // We implement a vector concat by:
4697 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4698 // 2. Insert the upper vector into the destination's upper element
4699 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4700 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4701
4702 const LLT Op1Ty = MRI.getType(Op1);
4703 const LLT Op2Ty = MRI.getType(Op2);
4704
4705 if (Op1Ty != Op2Ty) {
4706 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4707 return nullptr;
4708 }
4709 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4709, __extension__ __PRETTY_FUNCTION__))
;
4710
4711 if (Op1Ty.getSizeInBits() >= 128) {
4712 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4713 return nullptr;
4714 }
4715
4716 // At the moment we just support 64 bit vector concats.
4717 if (Op1Ty.getSizeInBits() != 64) {
4718 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4719 return nullptr;
4720 }
4721
4722 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4723 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4724 const TargetRegisterClass *DstRC =
4725 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4726
4727 MachineInstr *WidenedOp1 =
4728 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4729 MachineInstr *WidenedOp2 =
4730 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4731 if (!WidenedOp1 || !WidenedOp2) {
4732 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4733 return nullptr;
4734 }
4735
4736 // Now do the insert of the upper element.
4737 unsigned InsertOpc, InsSubRegIdx;
4738 std::tie(InsertOpc, InsSubRegIdx) =
4739 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4740
4741 if (!Dst)
4742 Dst = MRI.createVirtualRegister(DstRC);
4743 auto InsElt =
4744 MIRBuilder
4745 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4746 .addImm(1) /* Lane index */
4747 .addUse(WidenedOp2->getOperand(0).getReg())
4748 .addImm(0);
4749 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4750 return &*InsElt;
4751}
4752
4753MachineInstr *
4754AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4755 Register Src2, AArch64CC::CondCode Pred,
4756 MachineIRBuilder &MIRBuilder) const {
4757 auto &MRI = *MIRBuilder.getMRI();
4758 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4759 // If we used a register class, then this won't necessarily have an LLT.
4760 // Compute the size based off whether or not we have a class or bank.
4761 unsigned Size;
4762 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4763 Size = TRI.getRegSizeInBits(*RC);
4764 else
4765 Size = MRI.getType(Dst).getSizeInBits();
4766 // Some opcodes use s1.
4767 assert(Size <= 64 && "Expected 64 bits or less only!")(static_cast <bool> (Size <= 64 && "Expected 64 bits or less only!"
) ? void (0) : __assert_fail ("Size <= 64 && \"Expected 64 bits or less only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4767, __extension__ __PRETTY_FUNCTION__))
;
4768 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4769 unsigned Opc = OpcTable[Size == 64];
4770 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4771 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4772 return &*CSINC;
4773}
4774
4775std::pair<MachineInstr *, AArch64CC::CondCode>
4776AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4777 MachineOperand &LHS,
4778 MachineOperand &RHS,
4779 MachineIRBuilder &MIRBuilder) const {
4780 switch (Opcode) {
4781 default:
4782 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4782)
;
4783 case TargetOpcode::G_SADDO:
4784 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4785 case TargetOpcode::G_UADDO:
4786 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4787 case TargetOpcode::G_SSUBO:
4788 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4789 case TargetOpcode::G_USUBO:
4790 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4791 }
4792}
4793
4794/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4795/// expressed as a conjunction.
4796/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4797/// changing the conditions on the CMP tests.
4798/// (this means we can call emitConjunctionRec() with
4799/// Negate==true on this sub-tree)
4800/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4801/// cannot do the negation naturally. We are required to
4802/// emit the subtree first in this case.
4803/// \param WillNegate Is true if are called when the result of this
4804/// subexpression must be negated. This happens when the
4805/// outer expression is an OR. We can use this fact to know
4806/// that we have a double negation (or (or ...) ...) that
4807/// can be implemented for free.
4808static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4809 bool WillNegate, MachineRegisterInfo &MRI,
4810 unsigned Depth = 0) {
4811 if (!MRI.hasOneNonDBGUse(Val))
4812 return false;
4813 MachineInstr *ValDef = MRI.getVRegDef(Val);
4814 unsigned Opcode = ValDef->getOpcode();
4815 if (isa<GAnyCmp>(ValDef)) {
4816 CanNegate = true;
4817 MustBeFirst = false;
4818 return true;
4819 }
4820 // Protect against exponential runtime and stack overflow.
4821 if (Depth > 6)
4822 return false;
4823 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4824 bool IsOR = Opcode == TargetOpcode::G_OR;
4825 Register O0 = ValDef->getOperand(1).getReg();
4826 Register O1 = ValDef->getOperand(2).getReg();
4827 bool CanNegateL;
4828 bool MustBeFirstL;
4829 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4830 return false;
4831 bool CanNegateR;
4832 bool MustBeFirstR;
4833 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4834 return false;
4835
4836 if (MustBeFirstL && MustBeFirstR)
4837 return false;
4838
4839 if (IsOR) {
4840 // For an OR expression we need to be able to naturally negate at least
4841 // one side or we cannot do the transformation at all.
4842 if (!CanNegateL && !CanNegateR)
4843 return false;
4844 // If we the result of the OR will be negated and we can naturally negate
4845 // the leaves, then this sub-tree as a whole negates naturally.
4846 CanNegate = WillNegate && CanNegateL && CanNegateR;
4847 // If we cannot naturally negate the whole sub-tree, then this must be
4848 // emitted first.
4849 MustBeFirst = !CanNegate;
4850 } else {
4851 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Must be G_AND") ? void (0) : __assert_fail ("Opcode == TargetOpcode::G_AND && \"Must be G_AND\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4851, __extension__ __PRETTY_FUNCTION__))
;
4852 // We cannot naturally negate an AND operation.
4853 CanNegate = false;
4854 MustBeFirst = MustBeFirstL || MustBeFirstR;
4855 }
4856 return true;
4857 }
4858 return false;
4859}
4860
4861MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4862 Register LHS, Register RHS, CmpInst::Predicate CC,
4863 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4864 MachineIRBuilder &MIB) const {
4865 // TODO: emit CMN as an optimization.
4866 auto &MRI = *MIB.getMRI();
4867 LLT OpTy = MRI.getType(LHS);
4868 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64)(static_cast <bool> (OpTy.getSizeInBits() == 32 || OpTy
.getSizeInBits() == 64) ? void (0) : __assert_fail ("OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4868, __extension__ __PRETTY_FUNCTION__))
;
4869 unsigned CCmpOpc;
4870 std::optional<ValueAndVReg> C;
4871 if (CmpInst::isIntPredicate(CC)) {
4872 C = getIConstantVRegValWithLookThrough(RHS, MRI);
4873 if (C && C->Value.ult(32))
4874 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4875 else
4876 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4877 } else {
4878 switch (OpTy.getSizeInBits()) {
4879 case 16:
4880 CCmpOpc = AArch64::FCCMPHrr;
4881 break;
4882 case 32:
4883 CCmpOpc = AArch64::FCCMPSrr;
4884 break;
4885 case 64:
4886 CCmpOpc = AArch64::FCCMPDrr;
4887 break;
4888 default:
4889 return nullptr;
4890 }
4891 }
4892 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
4893 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4894 auto CCmp =
4895 MIB.buildInstr(CCmpOpc, {}, {LHS});
4896 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4897 CCmp.addImm(C->Value.getZExtValue());
4898 else
4899 CCmp.addReg(RHS);
4900 CCmp.addImm(NZCV).addImm(Predicate);
4901 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
4902 return &*CCmp;
4903}
4904
4905MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4906 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4907 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4908 // We're at a tree leaf, produce a conditional comparison operation.
4909 auto &MRI = *MIB.getMRI();
4910 MachineInstr *ValDef = MRI.getVRegDef(Val);
4911 unsigned Opcode = ValDef->getOpcode();
4912 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4913 Register LHS = Cmp->getLHSReg();
4914 Register RHS = Cmp->getRHSReg();
4915 CmpInst::Predicate CC = Cmp->getCond();
4916 if (Negate)
4917 CC = CmpInst::getInversePredicate(CC);
4918 if (isa<GICmp>(Cmp)) {
4919 OutCC = changeICMPPredToAArch64CC(CC);
4920 } else {
4921 // Handle special FP cases.
4922 AArch64CC::CondCode ExtraCC;
4923 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4924 // Some floating point conditions can't be tested with a single condition
4925 // code. Construct an additional comparison in this case.
4926 if (ExtraCC != AArch64CC::AL) {
4927 MachineInstr *ExtraCmp;
4928 if (!CCOp)
4929 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4930 else
4931 ExtraCmp =
4932 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4933 CCOp = ExtraCmp->getOperand(0).getReg();
4934 Predicate = ExtraCC;
4935 }
4936 }
4937
4938 // Produce a normal comparison if we are first in the chain
4939 if (!CCOp) {
4940 auto Dst = MRI.cloneVirtualRegister(LHS);
4941 if (isa<GICmp>(Cmp))
4942 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4943 return emitFPCompare(Cmp->getOperand(2).getReg(),
4944 Cmp->getOperand(3).getReg(), MIB);
4945 }
4946 // Otherwise produce a ccmp.
4947 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4948 }
4949 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree")(static_cast <bool> (MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("MRI.hasOneNonDBGUse(Val) && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4949, __extension__ __PRETTY_FUNCTION__))
;
4950
4951 bool IsOR = Opcode == TargetOpcode::G_OR;
4952
4953 Register LHS = ValDef->getOperand(1).getReg();
4954 bool CanNegateL;
4955 bool MustBeFirstL;
4956 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4957 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4957, __extension__ __PRETTY_FUNCTION__))
;
4958 (void)ValidL;
4959
4960 Register RHS = ValDef->getOperand(2).getReg();
4961 bool CanNegateR;
4962 bool MustBeFirstR;
4963 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4964 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4964, __extension__ __PRETTY_FUNCTION__))
;
4965 (void)ValidR;
4966
4967 // Swap sub-tree that must come first to the right side.
4968 if (MustBeFirstL) {
4969 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4969, __extension__ __PRETTY_FUNCTION__))
;
4970 std::swap(LHS, RHS);
4971 std::swap(CanNegateL, CanNegateR);
4972 std::swap(MustBeFirstL, MustBeFirstR);
4973 }
4974
4975 bool NegateR;
4976 bool NegateAfterR;
4977 bool NegateL;
4978 bool NegateAfterAll;
4979 if (Opcode == TargetOpcode::G_OR) {
4980 // Swap the sub-tree that we can negate naturally to the left.
4981 if (!CanNegateL) {
4982 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4982, __extension__ __PRETTY_FUNCTION__))
;
4983 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4983, __extension__ __PRETTY_FUNCTION__))
;
4984 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4984, __extension__ __PRETTY_FUNCTION__))
;
4985 std::swap(LHS, RHS);
4986 NegateR = false;
4987 NegateAfterR = true;
4988 } else {
4989 // Negate the left sub-tree if possible, otherwise negate the result.
4990 NegateR = CanNegateR;
4991 NegateAfterR = !CanNegateR;
4992 }
4993 NegateL = true;
4994 NegateAfterAll = !Negate;
4995 } else {
4996 assert(Opcode == TargetOpcode::G_AND &&(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4997, __extension__ __PRETTY_FUNCTION__))
4997 "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("Opcode == TargetOpcode::G_AND && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4997, __extension__ __PRETTY_FUNCTION__))
;
4998 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4998, __extension__ __PRETTY_FUNCTION__))
;
4999
5000 NegateL = false;
5001 NegateR = false;
5002 NegateAfterR = false;
5003 NegateAfterAll = false;
5004 }
5005
5006 // Emit sub-trees.
5007 AArch64CC::CondCode RHSCC;
5008 MachineInstr *CmpR =
5009 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
5010 if (NegateAfterR)
5011 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
5012 MachineInstr *CmpL = emitConjunctionRec(
5013 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
5014 if (NegateAfterAll)
5015 OutCC = AArch64CC::getInvertedCondCode(OutCC);
5016 return CmpL;
5017}
5018
5019MachineInstr *AArch64InstructionSelector::emitConjunction(
5020 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5021 bool DummyCanNegate;
5022 bool DummyMustBeFirst;
5023 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
5024 *MIB.getMRI()))
5025 return nullptr;
5026 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
5027}
5028
5029bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5030 MachineInstr &CondMI) {
5031 AArch64CC::CondCode AArch64CC;
5032 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
5033 if (!ConjMI)
5034 return false;
5035
5036 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
5037 SelI.eraseFromParent();
5038 return true;
5039}
5040
5041bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5042 MachineRegisterInfo &MRI = *MIB.getMRI();
5043 // We want to recognize this pattern:
5044 //
5045 // $z = G_FCMP pred, $x, $y
5046 // ...
5047 // $w = G_SELECT $z, $a, $b
5048 //
5049 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5050 // some copies/truncs in between.)
5051 //
5052 // If we see this, then we can emit something like this:
5053 //
5054 // fcmp $x, $y
5055 // fcsel $w, $a, $b, pred
5056 //
5057 // Rather than emitting both of the rather long sequences in the standard
5058 // G_FCMP/G_SELECT select methods.
5059
5060 // First, check if the condition is defined by a compare.
5061 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
5062
5063 // We can only fold if all of the defs have one use.
5064 Register CondDefReg = CondDef->getOperand(0).getReg();
5065 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
5066 // Unless it's another select.
5067 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
5068 if (CondDef == &UI)
5069 continue;
5070 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5071 return false;
5072 }
5073 }
5074
5075 // Is the condition defined by a compare?
5076 unsigned CondOpc = CondDef->getOpcode();
5077 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5078 if (tryOptSelectConjunction(I, *CondDef))
5079 return true;
5080 return false;
5081 }
5082
5083 AArch64CC::CondCode CondCode;
5084 if (CondOpc == TargetOpcode::G_ICMP) {
5085 auto Pred =
5086 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5087 CondCode = changeICMPPredToAArch64CC(Pred);
5088 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
5089 CondDef->getOperand(1), MIB);
5090 } else {
5091 // Get the condition code for the select.
5092 auto Pred =
5093 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5094 AArch64CC::CondCode CondCode2;
5095 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5096
5097 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5098 // instructions to emit the comparison.
5099 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5100 // unnecessary.
5101 if (CondCode2 != AArch64CC::AL)
5102 return false;
5103
5104 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5105 CondDef->getOperand(3).getReg(), MIB)) {
5106 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
5107 return false;
5108 }
5109 }
5110
5111 // Emit the select.
5112 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5113 I.getOperand(3).getReg(), CondCode, MIB);
5114 I.eraseFromParent();
5115 return true;
5116}
5117
5118MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5119 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5120 MachineIRBuilder &MIRBuilder) const {
5121 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5122, __extension__ __PRETTY_FUNCTION__))
5122 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5122, __extension__ __PRETTY_FUNCTION__))
;
5123 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5124 // We want to find this sort of thing:
5125 // x = G_SUB 0, y
5126 // G_ICMP z, x
5127 //
5128 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5129 // e.g:
5130 //
5131 // cmn z, y
5132
5133 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5134 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5135 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5136 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5137 // Given this:
5138 //
5139 // x = G_SUB 0, y
5140 // G_ICMP x, z
5141 //
5142 // Produce this:
5143 //
5144 // cmn y, z
5145 if (isCMN(LHSDef, P, MRI))
5146 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5147
5148 // Same idea here, but with the RHS of the compare instead:
5149 //
5150 // Given this:
5151 //
5152 // x = G_SUB 0, y
5153 // G_ICMP z, x
5154 //
5155 // Produce this:
5156 //
5157 // cmn z, y
5158 if (isCMN(RHSDef, P, MRI))
5159 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5160
5161 // Given this:
5162 //
5163 // z = G_AND x, y
5164 // G_ICMP z, 0
5165 //
5166 // Produce this if the compare is signed:
5167 //
5168 // tst x, y
5169 if (!CmpInst::isUnsigned(P) && LHSDef &&
5170 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5171 // Make sure that the RHS is 0.
5172 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5173 if (!ValAndVReg || ValAndVReg->Value != 0)
5174 return nullptr;
5175
5176 return emitTST(LHSDef->getOperand(1),
5177 LHSDef->getOperand(2), MIRBuilder);
5178 }
5179
5180 return nullptr;
5181}
5182
5183bool AArch64InstructionSelector::selectShuffleVector(
5184 MachineInstr &I, MachineRegisterInfo &MRI) {
5185 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5186 Register Src1Reg = I.getOperand(1).getReg();
5187 const LLT Src1Ty = MRI.getType(Src1Reg);
5188 Register Src2Reg = I.getOperand(2).getReg();
5189 const LLT Src2Ty = MRI.getType(Src2Reg);
5190 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5191
5192 MachineBasicBlock &MBB = *I.getParent();
5193 MachineFunction &MF = *MBB.getParent();
5194 LLVMContext &Ctx = MF.getFunction().getContext();
5195
5196 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5197 // it's originated from a <1 x T> type. Those should have been lowered into
5198 // G_BUILD_VECTOR earlier.
5199 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5200 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
5201 return false;
5202 }
5203
5204 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5205
5206 SmallVector<Constant *, 64> CstIdxs;
5207 for (int Val : Mask) {
5208 // For now, any undef indexes we'll just assume to be 0. This should be
5209 // optimized in future, e.g. to select DUP etc.
5210 Val = Val < 0 ? 0 : Val;
5211 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5212 unsigned Offset = Byte + Val * BytesPerElt;
5213 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5214 }
5215 }
5216
5217 // Use a constant pool to load the index vector for TBL.
5218 Constant *CPVal = ConstantVector::get(CstIdxs);
5219 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5220 if (!IndexLoad) {
5221 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
5222 return false;
5223 }
5224
5225 if (DstTy.getSizeInBits() != 128) {
5226 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5226, __extension__ __PRETTY_FUNCTION__))
;
5227 // This case can be done with TBL1.
5228 MachineInstr *Concat =
5229 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5230 if (!Concat) {
5231 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
5232 return false;
5233 }
5234
5235 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5236 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5237 IndexLoad->getOperand(0).getReg(), MIB);
5238
5239 auto TBL1 = MIB.buildInstr(
5240 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5241 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5242 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
5243
5244 auto Copy =
5245 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5246 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5247 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5248 I.eraseFromParent();
5249 return true;
5250 }
5251
5252 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5253 // Q registers for regalloc.
5254 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5255 auto RegSeq = createQTuple(Regs, MIB);
5256 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5257 {RegSeq, IndexLoad->getOperand(0)});
5258 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
5259 I.eraseFromParent();
5260 return true;
5261}
5262
5263MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5264 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5265 unsigned LaneIdx, const RegisterBank &RB,
5266 MachineIRBuilder &MIRBuilder) const {
5267 MachineInstr *InsElt = nullptr;
5268 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5269 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5270
5271 // Create a register to define with the insert if one wasn't passed in.
5272 if (!DstReg)
5273 DstReg = MRI.createVirtualRegister(DstRC);
5274
5275 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5276 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5277
5278 if (RB.getID() == AArch64::FPRRegBankID) {
5279 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5280 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5281 .addImm(LaneIdx)
5282 .addUse(InsSub->getOperand(0).getReg())
5283 .addImm(0);
5284 } else {
5285 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5286 .addImm(LaneIdx)
5287 .addUse(EltReg);
5288 }
5289
5290 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
5291 return InsElt;
5292}
5293
5294bool AArch64InstructionSelector::selectUSMovFromExtend(
5295 MachineInstr &MI, MachineRegisterInfo &MRI) {
5296 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5297 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5298 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5299 return false;
5300 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5301 const Register DefReg = MI.getOperand(0).getReg();
5302 const LLT DstTy = MRI.getType(DefReg);
5303 unsigned DstSize = DstTy.getSizeInBits();
5304
5305 if (DstSize != 32 && DstSize != 64)
5306 return false;
5307
5308 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5309 MI.getOperand(1).getReg(), MRI);
5310 int64_t Lane;
5311 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5312 return false;
5313 Register Src0 = Extract->getOperand(1).getReg();
5314
5315 const LLT &VecTy = MRI.getType(Src0);
5316
5317 if (VecTy.getSizeInBits() != 128) {
5318 const MachineInstr *ScalarToVector = emitScalarToVector(
5319 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5320 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5320, __extension__ __PRETTY_FUNCTION__))
;
5321 Src0 = ScalarToVector->getOperand(0).getReg();
5322 }
5323
5324 unsigned Opcode;
5325 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5326 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5327 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5328 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5329 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5330 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5331 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5332 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5333 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5334 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5335 else
5336 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5336)
;
5337
5338 // We may need to generate one of these, depending on the type and sign of the
5339 // input:
5340 // DstReg = SMOV Src0, Lane;
5341 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5342 MachineInstr *ExtI = nullptr;
5343 if (DstSize == 64 && !IsSigned) {
5344 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5345 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5346 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5347 .addImm(0)
5348 .addUse(NewReg)
5349 .addImm(AArch64::sub_32);
5350 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5351 } else
5352 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5353
5354 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
5355 MI.eraseFromParent();
5356 return true;
5357}
5358
5359bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
5360 MachineRegisterInfo &MRI) {
5361 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5361, __extension__ __PRETTY_FUNCTION__))
;
5362
5363 // Get information on the destination.
5364 Register DstReg = I.getOperand(0).getReg();
5365 const LLT DstTy = MRI.getType(DstReg);
5366 unsigned VecSize = DstTy.getSizeInBits();
5367
5368 // Get information on the element we want to insert into the destination.
5369 Register EltReg = I.getOperand(2).getReg();
5370 const LLT EltTy = MRI.getType(EltReg);
5371 unsigned EltSize = EltTy.getSizeInBits();
5372 if (EltSize < 16 || EltSize > 64)
5373 return false; // Don't support all element types yet.
5374
5375 // Find the definition of the index. Bail out if it's not defined by a
5376 // G_CONSTANT.
5377 Register IdxReg = I.getOperand(3).getReg();
5378 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
5379 if (!VRegAndVal)
5380 return false;
5381 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5382
5383 // Perform the lane insert.
5384 Register SrcReg = I.getOperand(1).getReg();
5385 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5386
5387 if (VecSize < 128) {
5388 // If the vector we're inserting into is smaller than 128 bits, widen it
5389 // to 128 to do the insert.
5390 MachineInstr *ScalarToVec =
5391 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5392 if (!ScalarToVec)
5393 return false;
5394 SrcReg = ScalarToVec->getOperand(0).getReg();
5395 }
5396
5397 // Create an insert into a new FPR128 register.
5398 // Note that if our vector is already 128 bits, we end up emitting an extra
5399 // register.
5400 MachineInstr *InsMI =
5401 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5402
5403 if (VecSize < 128) {
5404 // If we had to widen to perform the insert, then we have to demote back to
5405 // the original size to get the result we want.
5406 Register DemoteVec = InsMI->getOperand(0).getReg();
5407 const TargetRegisterClass *RC =
5408 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI));
5409 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5410 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5411 return false;
5412 }
5413 unsigned SubReg = 0;
5414 if (!getSubRegForClass(RC, TRI, SubReg))
5415 return false;
5416 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5417 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5418 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5419 return false;
5420 }
5421 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5422 .addReg(DemoteVec, 0, SubReg);
5423 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5424 } else {
5425 // No widening needed.
5426 InsMI->getOperand(0).setReg(DstReg);
5427 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5428 }
5429
5430 I.eraseFromParent();
5431 return true;
5432}
5433
5434MachineInstr *
5435AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5436 MachineIRBuilder &MIRBuilder,
5437 MachineRegisterInfo &MRI) {
5438 LLT DstTy = MRI.getType(Dst);
5439 unsigned DstSize = DstTy.getSizeInBits();
5440 if (CV->isNullValue()) {
5441 if (DstSize == 128) {
5442 auto Mov =
5443 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5444 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5445 return &*Mov;
5446 }
5447
5448 if (DstSize == 64) {
5449 auto Mov =
5450 MIRBuilder
5451 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5452 .addImm(0);
5453 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5454 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5455 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5456 return &*Copy;
5457 }
5458 }
5459
5460 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5461 if (!CPLoad) {
5462 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5463 return nullptr;
5464 }
5465
5466 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5467 RBI.constrainGenericRegister(
5468 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5469 return &*Copy;
5470}
5471
5472bool AArch64InstructionSelector::tryOptConstantBuildVec(
5473 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5474 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5474, __extension__ __PRETTY_FUNCTION__))
;
5475 unsigned DstSize = DstTy.getSizeInBits();
5476 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5476, __extension__ __PRETTY_FUNCTION__))
;
5477 if (DstSize < 32)
5478 return false;
5479 // Check if we're building a constant vector, in which case we want to
5480 // generate a constant pool load instead of a vector insert sequence.
5481 SmallVector<Constant *, 16> Csts;
5482 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5483 // Try to find G_CONSTANT or G_FCONSTANT
5484 auto *OpMI =
5485 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5486 if (OpMI)
5487 Csts.emplace_back(
5488 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5489 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5490 I.getOperand(Idx).getReg(), MRI)))
5491 Csts.emplace_back(
5492 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5493 else
5494 return false;
5495 }
5496 Constant *CV = ConstantVector::get(Csts);
5497 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5498 return false;
5499 I.eraseFromParent();
5500 return true;
5501}
5502
5503bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5504 MachineInstr &I, MachineRegisterInfo &MRI) {
5505 // Given:
5506 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5507 //
5508 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5509 Register Dst = I.getOperand(0).getReg();
5510 Register EltReg = I.getOperand(1).getReg();
5511 LLT EltTy = MRI.getType(EltReg);
5512 // If the index isn't on the same bank as its elements, then this can't be a
5513 // SUBREG_TO_REG.
5514 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5515 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5516 if (EltRB != DstRB)
5517 return false;
5518 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5519 [&MRI](const MachineOperand &Op) {
5520 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5521 MRI);
5522 }))
5523 return false;
5524 unsigned SubReg;
5525 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5526 if (!EltRC)
5527 return false;
5528 const TargetRegisterClass *DstRC =
5529 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5530 if (!DstRC)
5531 return false;
5532 if (!getSubRegForClass(EltRC, TRI, SubReg))
5533 return false;
5534 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5535 .addImm(0)
5536 .addUse(EltReg)
5537 .addImm(SubReg);
5538 I.eraseFromParent();
5539 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5540 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5541}
5542
5543bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5544 MachineRegisterInfo &MRI) {
5545 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5545, __extension__ __PRETTY_FUNCTION__))
;
5546 // Until we port more of the optimized selections, for now just use a vector
5547 // insert sequence.
5548 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5549 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5550 unsigned EltSize = EltTy.getSizeInBits();
5551
5552 if (tryOptConstantBuildVec(I, DstTy, MRI))
5553 return true;
5554 if (tryOptBuildVecToSubregToReg(I, MRI))
5555 return true;
5556
5557 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5558 return false; // Don't support all element types yet.
5559 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5560
5561 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5562 MachineInstr *ScalarToVec =
5563 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5564 I.getOperand(1).getReg(), MIB);
5565 if (!ScalarToVec)
5566 return false;
5567
5568 Register DstVec = ScalarToVec->getOperand(0).getReg();
5569 unsigned DstSize = DstTy.getSizeInBits();
5570
5571 // Keep track of the last MI we inserted. Later on, we might be able to save
5572 // a copy using it.
5573 MachineInstr *PrevMI = nullptr;
5574 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5575 // Note that if we don't do a subregister copy, we can end up making an
5576 // extra register.
5577 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(),
5578 i - 1, RB, MIB);
5579 DstVec = PrevMI->getOperand(0).getReg();
5580 }
5581
5582 // If DstTy's size in bits is less than 128, then emit a subregister copy
5583 // from DstVec to the last register we've defined.
5584 if (DstSize < 128) {
5585 // Force this to be FPR using the destination vector.
5586 const TargetRegisterClass *RC =
5587 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5588 if (!RC)
5589 return false;
5590 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5591 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5592 return false;
5593 }
5594
5595 unsigned SubReg = 0;
5596 if (!getSubRegForClass(RC, TRI, SubReg))
5597 return false;
5598 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5599 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5600 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5601 return false;
5602 }
5603
5604 Register Reg = MRI.createVirtualRegister(RC);
5605 Register DstReg = I.getOperand(0).getReg();
5606
5607 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5608 MachineOperand &RegOp = I.getOperand(1);
5609 RegOp.setReg(Reg);
5610 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5611 } else {
5612 // We don't need a subregister copy. Save a copy by re-using the
5613 // destination register on the final insert.
5614 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5614, __extension__ __PRETTY_FUNCTION__))
;
5615 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5616 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5617 }
5618
5619 I.eraseFromParent();
5620 return true;
5621}
5622
5623bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5624 unsigned NumVecs,
5625 MachineInstr &I) {
5626 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5626, __extension__ __PRETTY_FUNCTION__))
;
5627 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__))
;
5628 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5628, __extension__ __PRETTY_FUNCTION__))
;
5629 auto &MRI = *MIB.getMRI();
5630 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5631 unsigned Size = Ty.getSizeInBits();
5632 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5633, __extension__ __PRETTY_FUNCTION__))
5633 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5633, __extension__ __PRETTY_FUNCTION__))
;
5634 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5635 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5636 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5636, __extension__ __PRETTY_FUNCTION__))
;
5637 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5638 Load.cloneMemRefs(I);
5639 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5640 Register SelectedLoadDst = Load->getOperand(0).getReg();
5641 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5642 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5643 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5644 // Emit the subreg copies and immediately select them.
5645 // FIXME: We should refactor our copy code into an emitCopy helper and
5646 // clean up uses of this pattern elsewhere in the selector.
5647 selectCopy(*Vec, TII, MRI, TRI, RBI);
5648 }
5649 return true;
5650}
5651
5652bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5653 MachineInstr &I, MachineRegisterInfo &MRI) {
5654 // Find the intrinsic ID.
5655 unsigned IntrinID = I.getIntrinsicID();
5656
5657 const LLT S8 = LLT::scalar(8);
5658 const LLT S16 = LLT::scalar(16);
5659 const LLT S32 = LLT::scalar(32);
5660 const LLT S64 = LLT::scalar(64);
5661 const LLT P0 = LLT::pointer(0, 64);
5662 // Select the instruction.
5663 switch (IntrinID) {
5664 default:
5665 return false;
5666 case Intrinsic::aarch64_ldxp:
5667 case Intrinsic::aarch64_ldaxp: {
5668 auto NewI = MIB.buildInstr(
5669 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5670 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5671 {I.getOperand(3)});
5672 NewI.cloneMemRefs(I);
5673 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5674 break;
5675 }
5676 case Intrinsic::trap:
5677 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5678 break;
5679 case Intrinsic::debugtrap:
5680 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5681 break;
5682 case Intrinsic::ubsantrap:
5683 MIB.buildInstr(AArch64::BRK, {}, {})
5684 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5685 break;
5686 case Intrinsic::aarch64_neon_ld2: {
5687 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5688 unsigned Opc = 0;
5689 if (Ty == LLT::fixed_vector(8, S8))
5690 Opc = AArch64::LD2Twov8b;
5691 else if (Ty == LLT::fixed_vector(16, S8))
5692 Opc = AArch64::LD2Twov16b;
5693 else if (Ty == LLT::fixed_vector(4, S16))
5694 Opc = AArch64::LD2Twov4h;
5695 else if (Ty == LLT::fixed_vector(8, S16))
5696 Opc = AArch64::LD2Twov8h;
5697 else if (Ty == LLT::fixed_vector(2, S32))
5698 Opc = AArch64::LD2Twov2s;
5699 else if (Ty == LLT::fixed_vector(4, S32))
5700 Opc = AArch64::LD2Twov4s;
5701 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5702 Opc = AArch64::LD2Twov2d;
5703 else if (Ty == S64 || Ty == P0)
5704 Opc = AArch64::LD1Twov1d;
5705 else
5706 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5706)
;
5707 selectVectorLoadIntrinsic(Opc, 2, I);
5708 break;
5709 }
5710 case Intrinsic::aarch64_neon_ld4: {
5711 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5712 unsigned Opc = 0;
5713 if (Ty == LLT::fixed_vector(8, S8))
5714 Opc = AArch64::LD4Fourv8b;
5715 else if (Ty == LLT::fixed_vector(16, S8))
5716 Opc = AArch64::LD4Fourv16b;
5717 else if (Ty == LLT::fixed_vector(4, S16))
5718 Opc = AArch64::LD4Fourv4h;
5719 else if (Ty == LLT::fixed_vector(8, S16))
5720 Opc = AArch64::LD4Fourv8h;
5721 else if (Ty == LLT::fixed_vector(2, S32))
5722 Opc = AArch64::LD4Fourv2s;
5723 else if (Ty == LLT::fixed_vector(4, S32))
5724 Opc = AArch64::LD4Fourv4s;
5725 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5726 Opc = AArch64::LD4Fourv2d;
5727 else if (Ty == S64 || Ty == P0)
5728 Opc = AArch64::LD1Fourv1d;
5729 else
5730 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5730)
;
5731 selectVectorLoadIntrinsic(Opc, 4, I);
5732 break;
5733 }
5734 case Intrinsic::aarch64_neon_st2: {
5735 Register Src1 = I.getOperand(1).getReg();
5736 Register Src2 = I.getOperand(2).getReg();
5737 Register Ptr = I.getOperand(3).getReg();
5738 LLT Ty = MRI.getType(Src1);
5739 unsigned Opc;
5740 if (Ty == LLT::fixed_vector(8, S8))
5741 Opc = AArch64::ST2Twov8b;
5742 else if (Ty == LLT::fixed_vector(16, S8))
5743 Opc = AArch64::ST2Twov16b;
5744 else if (Ty == LLT::fixed_vector(4, S16))
5745 Opc = AArch64::ST2Twov4h;
5746 else if (Ty == LLT::fixed_vector(8, S16))
5747 Opc = AArch64::ST2Twov8h;
5748 else if (Ty == LLT::fixed_vector(2, S32))
5749 Opc = AArch64::ST2Twov2s;
5750 else if (Ty == LLT::fixed_vector(4, S32))
5751 Opc = AArch64::ST2Twov4s;
5752 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5753 Opc = AArch64::ST2Twov2d;
5754 else if (Ty == S64 || Ty == P0)
5755 Opc = AArch64::ST1Twov1d;
5756 else
5757 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5757)
;
5758 SmallVector<Register, 2> Regs = {Src1, Src2};
5759 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5760 : createDTuple(Regs, MIB);
5761 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5762 Store.cloneMemRefs(I);
5763 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5764 break;
5765 }
5766 case Intrinsic::aarch64_mops_memset_tag: {
5767 // Transform
5768 // %dst:gpr(p0) = \
5769 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
5770 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
5771 // where %dst is updated, into
5772 // %Rd:GPR64common, %Rn:GPR64) = \
5773 // MOPSMemorySetTaggingPseudo \
5774 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
5775 // where Rd and Rn are tied.
5776 // It is expected that %val has been extended to s64 in legalization.
5777 // Note that the order of the size/value operands are swapped.
5778
5779 Register DstDef = I.getOperand(0).getReg();
5780 // I.getOperand(1) is the intrinsic function
5781 Register DstUse = I.getOperand(2).getReg();
5782 Register ValUse = I.getOperand(3).getReg();
5783 Register SizeUse = I.getOperand(4).getReg();
5784
5785 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
5786 // Therefore an additional virtual register is requried for the updated size
5787 // operand. This value is not accessible via the semantics of the intrinsic.
5788 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
5789
5790 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
5791 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
5792 Memset.cloneMemRefs(I);
5793 constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
5794 break;
5795 }
5796 }
5797
5798 I.eraseFromParent();
5799 return true;
5800}
5801
5802bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5803 MachineRegisterInfo &MRI) {
5804 unsigned IntrinID = I.getIntrinsicID();
5805
5806 switch (IntrinID) {
5807 default:
5808 break;
5809 case Intrinsic::aarch64_crypto_sha1h: {
5810 Register DstReg = I.getOperand(0).getReg();
5811 Register SrcReg = I.getOperand(2).getReg();
5812
5813 // FIXME: Should this be an assert?
5814 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5815 MRI.getType(SrcReg).getSizeInBits() != 32)
5816 return false;
5817
5818 // The operation has to happen on FPRs. Set up some new FPR registers for
5819 // the source and destination if they are on GPRs.
5820 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5821 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5822 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5823
5824 // Make sure the copy ends up getting constrained properly.
5825 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5826 AArch64::GPR32RegClass, MRI);
5827 }
5828
5829 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5830 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5831
5832 // Actually insert the instruction.
5833 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5834 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5835
5836 // Did we create a new register for the destination?
5837 if (DstReg != I.getOperand(0).getReg()) {
5838 // Yep. Copy the result of the instruction back into the original
5839 // destination.
5840 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5841 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5842 AArch64::GPR32RegClass, MRI);
5843 }
5844
5845 I.eraseFromParent();
5846 return true;
5847 }
5848 case Intrinsic::ptrauth_sign: {
5849 Register DstReg = I.getOperand(0).getReg();
5850 Register ValReg = I.getOperand(2).getReg();
5851 uint64_t Key = I.getOperand(3).getImm();
5852 Register DiscReg = I.getOperand(4).getReg();
5853 auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5854 bool IsDiscZero = DiscVal && DiscVal->isZero();
5855
5856 if (Key > AArch64PACKey::LAST)
5857 return false;
5858
5859 unsigned Opcodes[][4] = {
5860 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5861 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5862 unsigned Opcode = Opcodes[IsDiscZero][Key];
5863
5864 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5865
5866 if (!IsDiscZero) {
5867 PAC.addUse(DiscReg);
5868 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5869 }
5870
5871 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5872 I.eraseFromParent();
5873 return true;
5874 }
5875 case Intrinsic::ptrauth_strip: {
5876 Register DstReg = I.getOperand(0).getReg();
5877 Register ValReg = I.getOperand(2).getReg();
5878 uint64_t Key = I.getOperand(3).getImm();
5879
5880 if (Key > AArch64PACKey::LAST)
5881 return false;
5882 unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key);
5883
5884 MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5885
5886 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5887 RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI);
5888 I.eraseFromParent();
5889 return true;
5890 }
5891 case Intrinsic::frameaddress:
5892 case Intrinsic::returnaddress: {
5893 MachineFunction &MF = *I.getParent()->getParent();
5894 MachineFrameInfo &MFI = MF.getFrameInfo();
5895
5896 unsigned Depth = I.getOperand(2).getImm();
5897 Register DstReg = I.getOperand(0).getReg();
5898 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5899
5900 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5901 if (!MFReturnAddr) {
5902 // Insert the copy from LR/X30 into the entry block, before it can be
5903 // clobbered by anything.
5904 MFI.setReturnAddressIsTaken(true);
5905 MFReturnAddr = getFunctionLiveInPhysReg(
5906 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
5907 }
5908
5909 if (STI.hasPAuth()) {
5910 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5911 } else {
5912 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5913 MIB.buildInstr(AArch64::XPACLRI);
5914 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5915 }
5916
5917 I.eraseFromParent();
5918 return true;
5919 }
5920
5921 MFI.setFrameAddressIsTaken(true);
5922 Register FrameAddr(AArch64::FP);
5923 while (Depth--) {
5924 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5925 auto Ldr =
5926 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5927 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5928 FrameAddr = NextFrame;
5929 }
5930
5931 if (IntrinID == Intrinsic::frameaddress)
5932 MIB.buildCopy({DstReg}, {FrameAddr});
5933 else {
5934 MFI.setReturnAddressIsTaken(true);
5935
5936 if (STI.hasPAuth()) {
5937 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5938 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5939 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5940 } else {
5941 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5942 .addImm(1);
5943 MIB.buildInstr(AArch64::XPACLRI);
5944 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5945 }
5946 }
5947
5948 I.eraseFromParent();
5949 return true;
5950 }
5951 case Intrinsic::swift_async_context_addr:
5952 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5953 {Register(AArch64::FP)})
5954 .addImm(8)
5955 .addImm(0);
5956 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5957
5958 MF->getFrameInfo().setFrameAddressIsTaken(true);
5959 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5960 I.eraseFromParent();
5961 return true;
5962 }
5963 return false;
5964}
5965
5966InstructionSelector::ComplexRendererFns
5967AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5968 auto MaybeImmed = getImmedFromMO(Root);
5969 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
5970 return std::nullopt;
5971 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5972 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5973}
5974
5975InstructionSelector::ComplexRendererFns
5976AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5977 auto MaybeImmed = getImmedFromMO(Root);
5978 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
5979 return std::nullopt;
5980 uint64_t Enc = 31 - *MaybeImmed;
5981 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5982}
5983
5984InstructionSelector::ComplexRendererFns
5985AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5986 auto MaybeImmed = getImmedFromMO(Root);
5987 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
5988 return std::nullopt;
5989 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5990 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5991}
5992
5993InstructionSelector::ComplexRendererFns
5994AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5995 auto MaybeImmed = getImmedFromMO(Root);
5996 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
5997 return std::nullopt;
5998 uint64_t Enc = 63 - *MaybeImmed;
5999 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6000}
6001
6002/// Helper to select an immediate value that can be represented as a 12-bit
6003/// value shifted left by either 0 or 12. If it is possible to do so, return
6004/// the immediate and shift value. If not, return std::nullopt.
6005///
6006/// Used by selectArithImmed and selectNegArithImmed.
6007InstructionSelector::ComplexRendererFns
6008AArch64InstructionSelector::select12BitValueWithLeftShift(
6009 uint64_t Immed) const {
6010 unsigned ShiftAmt;
6011 if (Immed >> 12 == 0) {
6012 ShiftAmt = 0;
6013 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6014 ShiftAmt = 12;
6015 Immed = Immed >> 12;
6016 } else
6017 return std::nullopt;
6018
6019 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
6020 return {{
6021 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
6022 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
6023 }};
6024}
6025
6026/// SelectArithImmed - Select an immediate value that can be represented as
6027/// a 12-bit value shifted left by either 0 or 12. If so, return true with
6028/// Val set to the 12-bit value and Shift set to the shifter operand.
6029InstructionSelector::ComplexRendererFns
6030AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
6031 // This function is called from the addsub_shifted_imm ComplexPattern,
6032 // which lists [imm] as the list of opcode it's interested in, however
6033 // we still need to check whether the operand is actually an immediate
6034 // here because the ComplexPattern opcode list is only used in
6035 // root-level opcode matching.
6036 auto MaybeImmed = getImmedFromMO(Root);
6037 if (MaybeImmed == std::nullopt)
6038 return std::nullopt;
6039 return select12BitValueWithLeftShift(*MaybeImmed);
6040}
6041
6042/// SelectNegArithImmed - As above, but negates the value before trying to
6043/// select it.
6044InstructionSelector::ComplexRendererFns
6045AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
6046 // We need a register here, because we need to know if we have a 64 or 32
6047 // bit immediate.
6048 if (!Root.isReg())
6049 return std::nullopt;
6050 auto MaybeImmed = getImmedFromMO(Root);
6051 if (MaybeImmed == std::nullopt)
6052 return std::nullopt;
6053 uint64_t Immed = *MaybeImmed;
6054
6055 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
6056 // have the opposite effect on the C flag, so this pattern mustn't match under
6057 // those circumstances.
6058 if (Immed == 0)
6059 return std::nullopt;
6060
6061 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
6062 // the root.
6063 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6064 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
6065 Immed = ~((uint32_t)Immed) + 1;
6066 else
6067 Immed = ~Immed + 1ULL;
6068
6069 if (Immed & 0xFFFFFFFFFF000000ULL)
6070 return std::nullopt;
6071
6072 Immed &= 0xFFFFFFULL;
6073 return select12BitValueWithLeftShift(Immed);
6074}
6075
6076/// Return true if it is worth folding MI into an extended register. That is,
6077/// if it's safe to pull it into the addressing mode of a load or store as a
6078/// shift.
6079bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6080 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
6081 // Always fold if there is one use, or if we're optimizing for size.
6082 Register DefReg = MI.getOperand(0).getReg();
6083 if (MRI.hasOneNonDBGUse(DefReg) ||
6084 MI.getParent()->getParent()->getFunction().hasOptSize())
6085 return true;
6086
6087 // It's better to avoid folding and recomputing shifts when we don't have a
6088 // fastpath.
6089 if (!STI.hasLSLFast())
6090 return false;
6091
6092 // We have a fastpath, so folding a shift in and potentially computing it
6093 // many times may be beneficial. Check if this is only used in memory ops.
6094 // If it is, then we should fold.
6095 return all_of(MRI.use_nodbg_instructions(DefReg),
6096 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
6097}
6098
6099static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
6100 switch (Type) {
6101 case AArch64_AM::SXTB:
6102 case AArch64_AM::SXTH:
6103 case AArch64_AM::SXTW:
6104 return true;
6105 default:
6106 return false;
6107 }
6108}
6109
6110InstructionSelector::ComplexRendererFns
6111AArch64InstructionSelector::selectExtendedSHL(
6112 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
6113 unsigned SizeInBytes, bool WantsExt) const {
6114 assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand"
) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6114, __extension__ __PRETTY_FUNCTION__))
;
6115 assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand"
) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6115, __extension__ __PRETTY_FUNCTION__))
;
6116
6117 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6118 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
6119
6120 unsigned OffsetOpc = OffsetInst->getOpcode();
6121 bool LookedThroughZExt = false;
6122 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6123 // Try to look through a ZEXT.
6124 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6125 return std::nullopt;
6126
6127 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
6128 OffsetOpc = OffsetInst->getOpcode();
6129 LookedThroughZExt = true;
6130
6131 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6132 return std::nullopt;
6133 }
6134 // Make sure that the memory op is a valid size.
6135 int64_t LegalShiftVal = Log2_32(SizeInBytes);
6136 if (LegalShiftVal == 0)
6137 return std::nullopt;
6138 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6139 return std::nullopt;
6140
6141 // Now, try to find the specific G_CONSTANT. Start by assuming that the
6142 // register we will offset is the LHS, and the register containing the
6143 // constant is the RHS.
6144 Register OffsetReg = OffsetInst->getOperand(1).getReg();
6145 Register ConstantReg = OffsetInst->getOperand(2).getReg();
6146 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6147 if (!ValAndVReg) {
6148 // We didn't get a constant on the RHS. If the opcode is a shift, then
6149 // we're done.
6150 if (OffsetOpc == TargetOpcode::G_SHL)
6151 return std::nullopt;
6152
6153 // If we have a G_MUL, we can use either register. Try looking at the RHS.
6154 std::swap(OffsetReg, ConstantReg);
6155 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
6156 if (!ValAndVReg)
6157 return std::nullopt;
6158 }
6159
6160 // The value must fit into 3 bits, and must be positive. Make sure that is
6161 // true.
6162 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6163
6164 // Since we're going to pull this into a shift, the constant value must be
6165 // a power of 2. If we got a multiply, then we need to check this.
6166 if (OffsetOpc == TargetOpcode::G_MUL) {
6167 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6168 return std::nullopt;
6169
6170 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
6171 ImmVal = Log2_32(ImmVal);
6172 }
6173
6174 if ((ImmVal & 0x7) != ImmVal)
6175 return std::nullopt;
6176
6177 // We are only allowed to shift by LegalShiftVal. This shift value is built
6178 // into the instruction, so we can't just use whatever we want.
6179 if (ImmVal != LegalShiftVal)
6180 return std::nullopt;
6181
6182 unsigned SignExtend = 0;
6183 if (WantsExt) {
6184 // Check if the offset is defined by an extend, unless we looked through a
6185 // G_ZEXT earlier.
6186 if (!LookedThroughZExt) {
6187 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
6188 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
6189 if (Ext == AArch64_AM::InvalidShiftExtend)
6190 return std::nullopt;
6191
6192 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
6193 // We only support SXTW for signed extension here.
6194 if (SignExtend && Ext != AArch64_AM::SXTW)
6195 return std::nullopt;
6196 OffsetReg = ExtInst->getOperand(1).getReg();
6197 }
6198
6199 // Need a 32-bit wide register here.
6200 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
6201 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
6202 }
6203
6204 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
6205 // offset. Signify that we are shifting by setting the shift flag to 1.
6206 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
6207 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
6208 [=](MachineInstrBuilder &MIB) {
6209 // Need to add both immediates here to make sure that they are both
6210 // added to the instruction.
6211 MIB.addImm(SignExtend);
6212 MIB.addImm(1);
6213 }}};
6214}
6215
6216/// This is used for computing addresses like this:
6217///
6218/// ldr x1, [x2, x3, lsl #3]
6219///
6220/// Where x2 is the base register, and x3 is an offset register. The shift-left
6221/// is a constant value specific to this load instruction. That is, we'll never
6222/// see anything other than a 3 here (which corresponds to the size of the
6223/// element being loaded.)
6224InstructionSelector::ComplexRendererFns
6225AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
6226 MachineOperand &Root, unsigned SizeInBytes) const {
6227 if (!Root.isReg())
6228 return std::nullopt;
6229 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6230
6231 // We want to find something like this:
6232 //
6233 // val = G_CONSTANT LegalShiftVal
6234 // shift = G_SHL off_reg val
6235 // ptr = G_PTR_ADD base_reg shift
6236 // x = G_LOAD ptr
6237 //
6238 // And fold it into this addressing mode:
6239 //
6240 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
6241
6242 // Check if we can find the G_PTR_ADD.
6243 MachineInstr *PtrAdd =
6244 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6245 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6246 return std::nullopt;
6247
6248 // Now, try to match an opcode which will match our specific offset.
6249 // We want a G_SHL or a G_MUL.
6250 MachineInstr *OffsetInst =
6251 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
6252 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
6253 OffsetInst->getOperand(0), SizeInBytes,
6254 /*WantsExt=*/false);
6255}
6256
6257/// This is used for computing addresses like this:
6258///
6259/// ldr x1, [x2, x3]
6260///
6261/// Where x2 is the base register, and x3 is an offset register.
6262///
6263/// When possible (or profitable) to fold a G_PTR_ADD into the address
6264/// calculation, this will do so. Otherwise, it will return std::nullopt.
6265InstructionSelector::ComplexRendererFns
6266AArch64InstructionSelector::selectAddrModeRegisterOffset(
6267 MachineOperand &Root) const {
6268 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6269
6270 // We need a GEP.
6271 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
6272 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
6273 return std::nullopt;
6274
6275 // If this is used more than once, let's not bother folding.
6276 // TODO: Check if they are memory ops. If they are, then we can still fold
6277 // without having to recompute anything.
6278 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
6279 return std::nullopt;
6280
6281 // Base is the GEP's LHS, offset is its RHS.
6282 return {{[=](MachineInstrBuilder &MIB) {
6283 MIB.addUse(Gep->getOperand(1).getReg());
6284 },
6285 [=](MachineInstrBuilder &MIB) {
6286 MIB.addUse(Gep->getOperand(2).getReg());
6287 },
6288 [=](MachineInstrBuilder &MIB) {
6289 // Need to add both immediates here to make sure that they are both
6290 // added to the instruction.
6291 MIB.addImm(0);
6292 MIB.addImm(0);
6293 }}};
6294}
6295
6296/// This is intended to be equivalent to selectAddrModeXRO in
6297/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
6298InstructionSelector::ComplexRendererFns
6299AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
6300 unsigned SizeInBytes) const {
6301 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6302 if (!Root.isReg())
6303 return std::nullopt;
6304 MachineInstr *PtrAdd =
6305 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6306 if (!PtrAdd)
6307 return std::nullopt;
6308
6309 // Check for an immediates which cannot be encoded in the [base + imm]
6310 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
6311 // end up with code like:
6312 //
6313 // mov x0, wide
6314 // add x1 base, x0
6315 // ldr x2, [x1, x0]
6316 //
6317 // In this situation, we can use the [base, xreg] addressing mode to save an
6318 // add/sub:
6319 //
6320 // mov x0, wide
6321 // ldr x2, [base, x0]
6322 auto ValAndVReg =
6323 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
6324 if (ValAndVReg) {
6325 unsigned Scale = Log2_32(SizeInBytes);
6326 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
6327
6328 // Skip immediates that can be selected in the load/store addresing
6329 // mode.
6330 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
6331 ImmOff < (0x1000 << Scale))
6332 return std::nullopt;
6333
6334 // Helper lambda to decide whether or not it is preferable to emit an add.
6335 auto isPreferredADD = [](int64_t ImmOff) {
6336 // Constants in [0x0, 0xfff] can be encoded in an add.
6337 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
6338 return true;
6339
6340 // Can it be encoded in an add lsl #12?
6341 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
6342 return false;
6343
6344 // It can be encoded in an add lsl #12, but we may not want to. If it is
6345 // possible to select this as a single movz, then prefer that. A single
6346 // movz is faster than an add with a shift.
6347 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
6348 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
6349 };
6350
6351 // If the immediate can be encoded in a single add/sub, then bail out.
6352 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
6353 return std::nullopt;
6354 }
6355
6356 // Try to fold shifts into the addressing mode.
6357 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
6358 if (AddrModeFns)
6359 return AddrModeFns;
6360
6361 // If that doesn't work, see if it's possible to fold in registers from
6362 // a GEP.
6363 return selectAddrModeRegisterOffset(Root);
6364}
6365
6366/// This is used for computing addresses like this:
6367///
6368/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
6369///
6370/// Where we have a 64-bit base register, a 32-bit offset register, and an
6371/// extend (which may or may not be signed).
6372InstructionSelector::ComplexRendererFns
6373AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
6374 unsigned SizeInBytes) const {
6375 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
6376
6377 MachineInstr *PtrAdd =
6378 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
6379 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
6380 return std::nullopt;
6381
6382 MachineOperand &LHS = PtrAdd->getOperand(1);
6383 MachineOperand &RHS = PtrAdd->getOperand(2);
6384 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
6385
6386 // The first case is the same as selectAddrModeXRO, except we need an extend.
6387 // In this case, we try to find a shift and extend, and fold them into the
6388 // addressing mode.
6389 //
6390 // E.g.
6391 //
6392 // off_reg = G_Z/S/ANYEXT ext_reg
6393 // val = G_CONSTANT LegalShiftVal
6394 // shift = G_SHL off_reg val
6395 // ptr = G_PTR_ADD base_reg shift
6396 // x = G_LOAD ptr
6397 //
6398 // In this case we can get a load like this:
6399 //
6400 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
6401 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
6402 SizeInBytes, /*WantsExt=*/true);
6403 if (ExtendedShl)
6404 return ExtendedShl;
6405
6406 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
6407 //
6408 // e.g.
6409 // ldr something, [base_reg, ext_reg, sxtw]
6410 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
6411 return std::nullopt;
6412
6413 // Check if this is an extend. We'll get an extend type if it is.
6414 AArch64_AM::ShiftExtendType Ext =
6415 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
6416 if (Ext == AArch64_AM::InvalidShiftExtend)
6417 return std::nullopt;
6418
6419 // Need a 32-bit wide register.
6420 MachineIRBuilder MIB(*PtrAdd);
6421 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
6422 AArch64::GPR32RegClass, MIB);
6423 unsigned SignExtend = Ext == AArch64_AM::SXTW;
6424
6425 // Base is LHS, offset is ExtReg.
6426 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
6427 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6428 [=](MachineInstrBuilder &MIB) {
6429 MIB.addImm(SignExtend);
6430 MIB.addImm(0);
6431 }}};
6432}
6433
6434/// Select a "register plus unscaled signed 9-bit immediate" address. This
6435/// should only match when there is an offset that is not valid for a scaled
6436/// immediate addressing mode. The "Size" argument is the size in bytes of the
6437/// memory reference, which is needed here to know what is valid for a scaled
6438/// immediate.
6439InstructionSelector::ComplexRendererFns
6440AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
6441 unsigned Size) const {
6442 MachineRegisterInfo &MRI =
6443 Root.getParent()->getParent()->getParent()->getRegInfo();
6444
6445 if (!Root.isReg())
6446 return std::nullopt;
6447
6448 if (!isBaseWithConstantOffset(Root, MRI))
6449 return std::nullopt;
6450
6451 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6452
6453 MachineOperand &OffImm = RootDef->getOperand(2);
6454 if (!OffImm.isReg())
6455 return std::nullopt;
6456 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
6457 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
6458 return std::nullopt;
6459 int64_t RHSC;
6460 MachineOperand &RHSOp1 = RHS->getOperand(1);
6461 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
6462 return std::nullopt;
6463 RHSC = RHSOp1.getCImm()->getSExtValue();
6464
6465 // If the offset is valid as a scaled immediate, don't match here.
6466 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
6467 return std::nullopt;
6468 if (RHSC >= -256 && RHSC < 256) {
6469 MachineOperand &Base = RootDef->getOperand(1);
6470 return {{
6471 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
6472 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
6473 }};
6474 }
6475 return std::nullopt;
6476}
6477
6478InstructionSelector::ComplexRendererFns
6479AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
6480 unsigned Size,
6481 MachineRegisterInfo &MRI) const {
6482 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
6483 return std::nullopt;
6484 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
6485 if (Adrp.getOpcode() != AArch64::ADRP)
6486 return std::nullopt;
6487
6488 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
6489 auto Offset = Adrp.getOperand(1).getOffset();
6490 if (Offset % Size != 0)
6491 return std::nullopt;
6492
6493 auto GV = Adrp.getOperand(1).getGlobal();
6494 if (GV->isThreadLocal())
6495 return std::nullopt;
6496
6497 auto &MF = *RootDef.getParent()->getParent();
6498 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
6499 return std::nullopt;
6500
6501 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
6502 MachineIRBuilder MIRBuilder(RootDef);
6503 Register AdrpReg = Adrp.getOperand(0).getReg();
6504 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
6505 [=](MachineInstrBuilder &MIB) {
6506 MIB.addGlobalAddress(GV, Offset,
6507 OpFlags | AArch64II::MO_PAGEOFF |
6508 AArch64II::MO_NC);
6509 }}};
6510}
6511
6512/// Select a "register plus scaled unsigned 12-bit immediate" address. The
6513/// "Size" argument is the size in bytes of the memory reference, which
6514/// determines the scale.
6515InstructionSelector::ComplexRendererFns
6516AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
6517 unsigned Size) const {
6518 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
6519 MachineRegisterInfo &MRI = MF.getRegInfo();
6520
6521 if (!Root.isReg())
6522 return std::nullopt;
6523
6524 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6525 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
6526 return {{
6527 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
6528 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6529 }};
6530 }
6531
6532 CodeModel::Model CM = MF.getTarget().getCodeModel();
6533 // Check if we can fold in the ADD of small code model ADRP + ADD address.
6534 if (CM == CodeModel::Small) {
6535 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
6536 if (OpFns)
6537 return OpFns;
6538 }
6539
6540 if (isBaseWithConstantOffset(Root, MRI)) {
6541 MachineOperand &LHS = RootDef->getOperand(1);
6542 MachineOperand &RHS = RootDef->getOperand(2);
6543 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
6544 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
6545
6546 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
6547 unsigned Scale = Log2_32(Size);
6548 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
6549 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
6550 return {{
6551 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
6552 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6553 }};
6554
6555 return {{
6556 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
6557 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6558 }};
6559 }
6560 }
6561
6562 // Before falling back to our general case, check if the unscaled
6563 // instructions can handle this. If so, that's preferable.
6564 if (selectAddrModeUnscaled(Root, Size))
6565 return std::nullopt;
6566
6567 return {{
6568 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6569 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6570 }};
6571}
6572
6573/// Given a shift instruction, return the correct shift type for that
6574/// instruction.
6575static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
6576 switch (MI.getOpcode()) {
6577 default:
6578 return AArch64_AM::InvalidShiftExtend;
6579 case TargetOpcode::G_SHL:
6580 return AArch64_AM::LSL;
6581 case TargetOpcode::G_LSHR:
6582 return AArch64_AM::LSR;
6583 case TargetOpcode::G_ASHR:
6584 return AArch64_AM::ASR;
6585 case TargetOpcode::G_ROTR:
6586 return AArch64_AM::ROR;
6587 }
6588}
6589
6590/// Select a "shifted register" operand. If the value is not shifted, set the
6591/// shift operand to a default value of "lsl 0".
6592InstructionSelector::ComplexRendererFns
6593AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
6594 bool AllowROR) const {
6595 if (!Root.isReg())
6596 return std::nullopt;
6597 MachineRegisterInfo &MRI =
6598 Root.getParent()->getParent()->getParent()->getRegInfo();
6599
6600 // Check if the operand is defined by an instruction which corresponds to
6601 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
6602 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
6603 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
6604 if (ShType == AArch64_AM::InvalidShiftExtend)
6605 return std::nullopt;
6606 if (ShType == AArch64_AM::ROR && !AllowROR)
6607 return std::nullopt;
6608 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
6609 return std::nullopt;
6610
6611 // Need an immediate on the RHS.
6612 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
6613 auto Immed = getImmedFromMO(ShiftRHS);
6614 if (!Immed)
6615 return std::nullopt;
6616
6617 // We have something that we can fold. Fold in the shift's LHS and RHS into
6618 // the instruction.
6619 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
6620 Register ShiftReg = ShiftLHS.getReg();
6621
6622 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
6623 unsigned Val = *Immed & (NumBits - 1);
6624 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
6625
6626 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
6627 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
6628}
6629
6630AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
6631 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
6632 unsigned Opc = MI.getOpcode();
6633
6634 // Handle explicit extend instructions first.
6635 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
6636 unsigned Size;
6637 if (Opc == TargetOpcode::G_SEXT)
6638 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6639 else
6640 Size = MI.getOperand(2).getImm();
6641 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6641, __extension__ __PRETTY_FUNCTION__))
;
6642 switch (Size) {
6643 case 8:
6644 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
6645 case 16:
6646 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
6647 case 32:
6648 return AArch64_AM::SXTW;
6649 default:
6650 return AArch64_AM::InvalidShiftExtend;
6651 }
6652 }
6653
6654 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
6655 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6656 assert(Size != 64 && "Extend from 64 bits?")(static_cast <bool> (Size != 64 && "Extend from 64 bits?"
) ? void (0) : __assert_fail ("Size != 64 && \"Extend from 64 bits?\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6656, __extension__ __PRETTY_FUNCTION__))
;
6657 switch (Size) {
6658 case 8:
6659 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
6660 case 16:
6661 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
6662 case 32:
6663 return AArch64_AM::UXTW;
6664 default:
6665 return AArch64_AM::InvalidShiftExtend;
6666 }
6667 }
6668
6669 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
6670 // on the RHS.
6671 if (Opc != TargetOpcode::G_AND)
6672 return AArch64_AM::InvalidShiftExtend;
6673
6674 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
6675 if (!MaybeAndMask)
6676 return AArch64_AM::InvalidShiftExtend;
6677 uint64_t AndMask = *MaybeAndMask;
6678 switch (AndMask) {
6679 default:
6680 return AArch64_AM::InvalidShiftExtend;
6681 case 0xFF:
6682 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
6683 case 0xFFFF:
6684 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
6685 case 0xFFFFFFFF:
6686 return AArch64_AM::UXTW;
6687 }
6688}
6689
6690Register AArch64InstructionSelector::moveScalarRegClass(
6691 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
6692 MachineRegisterInfo &MRI = *MIB.getMRI();
6693 auto Ty = MRI.getType(Reg);
6694 assert(!Ty.isVector() && "Expected scalars only!")(static_cast <bool> (!Ty.isVector() && "Expected scalars only!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalars only!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6694, __extension__ __PRETTY_FUNCTION__))
;
6695 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
6696 return Reg;
6697
6698 // Create a copy and immediately select it.
6699 // FIXME: We should have an emitCopy function?
6700 auto Copy = MIB.buildCopy({&RC}, {Reg});
6701 selectCopy(*Copy, TII, MRI, TRI, RBI);
6702 return Copy.getReg(0);
6703}
6704
6705/// Select an "extended register" operand. This operand folds in an extend
6706/// followed by an optional left shift.
6707InstructionSelector::ComplexRendererFns
6708AArch64InstructionSelector::selectArithExtendedRegister(
6709 MachineOperand &Root) const {
6710 if (!Root.isReg())
6711 return std::nullopt;
6712 MachineRegisterInfo &MRI =
6713 Root.getParent()->getParent()->getParent()->getRegInfo();
6714
6715 uint64_t ShiftVal = 0;
6716 Register ExtReg;
6717 AArch64_AM::ShiftExtendType Ext;
6718 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
6719 if (!RootDef)
6720 return std::nullopt;
6721
6722 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
6723 return std::nullopt;
6724
6725 // Check if we can fold a shift and an extend.
6726 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
6727 // Look for a constant on the RHS of the shift.
6728 MachineOperand &RHS = RootDef->getOperand(2);
6729 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
6730 if (!MaybeShiftVal)
6731 return std::nullopt;
6732 ShiftVal = *MaybeShiftVal;
6733 if (ShiftVal > 4)
6734 return std::nullopt;
6735 // Look for a valid extend instruction on the LHS of the shift.
6736 MachineOperand &LHS = RootDef->getOperand(1);
6737 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
6738 if (!ExtDef)
6739 return std::nullopt;
6740 Ext = getExtendTypeForInst(*ExtDef, MRI);
6741 if (Ext == AArch64_AM::InvalidShiftExtend)
6742 return std::nullopt;
6743 ExtReg = ExtDef->getOperand(1).getReg();
6744 } else {
6745 // Didn't get a shift. Try just folding an extend.
6746 Ext = getExtendTypeForInst(*RootDef, MRI);
6747 if (Ext == AArch64_AM::InvalidShiftExtend)
6748 return std::nullopt;
6749 ExtReg = RootDef->getOperand(1).getReg();
6750
6751 // If we have a 32 bit instruction which zeroes out the high half of a
6752 // register, we get an implicit zero extend for free. Check if we have one.
6753 // FIXME: We actually emit the extend right now even though we don't have
6754 // to.
6755 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
6756 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
6757 if (isDef32(*ExtInst))
6758 return std::nullopt;
6759 }
6760 }
6761
6762 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
6763 // copy.
6764 MachineIRBuilder MIB(*RootDef);
6765 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
6766
6767 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6768 [=](MachineInstrBuilder &MIB) {
6769 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
6770 }}};
6771}
6772
6773void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
6774 const MachineInstr &MI,
6775 int OpIdx) const {
6776 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6777 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6778, __extension__ __PRETTY_FUNCTION__))
6778 "Expected G_CONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6778, __extension__ __PRETTY_FUNCTION__))
;
6779 std::optional<int64_t> CstVal =
6780 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
6781 assert(CstVal && "Expected constant value")(static_cast <bool> (CstVal && "Expected constant value"
) ? void (0) : __assert_fail ("CstVal && \"Expected constant value\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6781, __extension__ __PRETTY_FUNCTION__))
;
6782 MIB.addImm(*CstVal);
6783}
6784
6785void AArch64InstructionSelector::renderLogicalImm32(
6786 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6787 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6788, __extension__ __PRETTY_FUNCTION__))
6788 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6788, __extension__ __PRETTY_FUNCTION__))
;
6789 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6790 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
6791 MIB.addImm(Enc);
6792}
6793
6794void AArch64InstructionSelector::renderLogicalImm64(
6795 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6796 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6797, __extension__ __PRETTY_FUNCTION__))
6797 "Expected G_CONSTANT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONSTANT
&& OpIdx == -1 && "Expected G_CONSTANT") ? void
(0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && \"Expected G_CONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6797, __extension__ __PRETTY_FUNCTION__))
;
6798 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6799 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
6800 MIB.addImm(Enc);
6801}
6802
6803void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
6804 const MachineInstr &MI,
6805 int OpIdx) const {
6806 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6807, __extension__ __PRETTY_FUNCTION__))
6807 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6807, __extension__ __PRETTY_FUNCTION__))
;
6808 MIB.addImm(
6809 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6810}
6811
6812void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
6813 const MachineInstr &MI,
6814 int OpIdx) const {
6815 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
6816 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6816, __extension__ __PRETTY_FUNCTION__))
;
6817 MIB.addImm(
6818 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6819}
6820
6821void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
6822 const MachineInstr &MI,
6823 int OpIdx) const {
6824 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
6825 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6825, __extension__ __PRETTY_FUNCTION__))
;
6826 MIB.addImm(
6827 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6828}
6829
6830void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
6831 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
6832 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6833, __extension__ __PRETTY_FUNCTION__))
6833 "Expected G_FCONSTANT")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_FCONSTANT
&& OpIdx == -1 && "Expected G_FCONSTANT") ? void
(0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && \"Expected G_FCONSTANT\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6833, __extension__ __PRETTY_FUNCTION__))
;
6834 MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
6835 .getFPImm()
6836 ->getValueAPF()
6837 .bitcastToAPInt()
6838 .getZExtValue()));
6839}
6840
6841bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
6842 const MachineInstr &MI, unsigned NumBytes) const {
6843 if (!MI.mayLoadOrStore())
6844 return false;
6845 assert(MI.hasOneMemOperand() &&(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6846, __extension__ __PRETTY_FUNCTION__))
6846 "Expected load/store to have only one mem op!")(static_cast <bool> (MI.hasOneMemOperand() && "Expected load/store to have only one mem op!"
) ? void (0) : __assert_fail ("MI.hasOneMemOperand() && \"Expected load/store to have only one mem op!\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6846, __extension__ __PRETTY_FUNCTION__))
;
6847 return (*MI.memoperands_begin())->getSize() == NumBytes;
6848}
6849
6850bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
6851 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6852 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
6853 return false;
6854
6855 // Only return true if we know the operation will zero-out the high half of
6856 // the 64-bit register. Truncates can be subregister copies, which don't
6857 // zero out the high bits. Copies and other copy-like instructions can be
6858 // fed by truncates, or could be lowered as subregister copies.
6859 switch (MI.getOpcode()) {
6860 default:
6861 return true;
6862 case TargetOpcode::COPY:
6863 case TargetOpcode::G_BITCAST:
6864 case TargetOpcode::G_TRUNC:
6865 case TargetOpcode::G_PHI:
6866 return false;
6867 }
6868}
6869
6870
6871// Perform fixups on the given PHI instruction's operands to force them all
6872// to be the same as the destination regbank.
6873static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
6874 const AArch64RegisterBankInfo &RBI) {
6875 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI")(static_cast <bool> (MI.getOpcode() == TargetOpcode::G_PHI
&& "Expected a G_PHI") ? void (0) : __assert_fail ("MI.getOpcode() == TargetOpcode::G_PHI && \"Expected a G_PHI\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6875, __extension__ __PRETTY_FUNCTION__))
;
6876 Register DstReg = MI.getOperand(0).getReg();
6877 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
6878 assert(DstRB && "Expected PHI dst to have regbank assigned")(static_cast <bool> (DstRB && "Expected PHI dst to have regbank assigned"
) ? void (0) : __assert_fail ("DstRB && \"Expected PHI dst to have regbank assigned\""
, "llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 6878, __extension__ __PRETTY_FUNCTION__))
;
6879 MachineIRBuilder MIB(MI);
6880
6881 // Go through each operand and ensure it has the same regbank.
6882 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
6883 if (!MO.isReg())
6884 continue;
6885 Register OpReg = MO.getReg();
6886 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
6887 if (RB != DstRB) {
6888 // Insert a cross-bank copy.
6889 auto *OpDef = MRI.getVRegDef(OpReg);
6890 const LLT &Ty = MRI.getType(OpReg);
6891 MachineBasicBlock &OpDefBB = *OpDef->getParent();
6892
6893 // Any instruction we insert must appear after all PHIs in the block
6894 // for the block to be valid MIR.
6895 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
6896 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
6897 InsertPt = OpDefBB.getFirstNonPHI();
6898 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
6899 auto Copy = MIB.buildCopy(Ty, OpReg);
6900 MRI.setRegBank(Copy.getReg(0), *DstRB);
6901 MO.setReg(Copy.getReg(0));
6902 }
6903 }
6904}
6905
6906void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
6907 // We're looking for PHIs, build a list so we don't invalidate iterators.
6908 MachineRegisterInfo &MRI = MF.getRegInfo();
6909 SmallVector<MachineInstr *, 32> Phis;
6910 for (auto &BB : MF) {
6911 for (auto &MI : BB) {
6912 if (MI.getOpcode() == TargetOpcode::G_PHI)
6913 Phis.emplace_back(&MI);
6914 }
6915 }
6916
6917 for (auto *MI : Phis) {
6918 // We need to do some work here if the operand types are < 16 bit and they
6919 // are split across fpr/gpr banks. Since all types <32b on gpr
6920 // end up being assigned gpr32 regclasses, we can end up with PHIs here
6921 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
6922 // be selecting heterogenous regbanks for operands if possible, but we
6923 // still need to be able to deal with it here.
6924 //
6925 // To fix this, if we have a gpr-bank operand < 32b in size and at least
6926 // one other operand is on the fpr bank, then we add cross-bank copies
6927 // to homogenize the operand banks. For simplicity the bank that we choose
6928 // to settle on is whatever bank the def operand has. For example:
6929 //
6930 // %endbb:
6931 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
6932 // =>
6933 // %bb2:
6934 // ...
6935 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
6936 // ...
6937 // %endbb:
6938 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
6939 bool HasGPROp = false, HasFPROp = false;
6940 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
6941 if (!MO.isReg())
6942 continue;
6943 const LLT &Ty = MRI.getType(MO.getReg());
6944 if (!Ty.isValid() || !Ty.isScalar())
6945 break;
6946 if (Ty.getSizeInBits() >= 32)
6947 break;
6948 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6949 // If for some reason we don't have a regbank yet. Don't try anything.
6950 if (!RB)
6951 break;
6952
6953 if (RB->getID() == AArch64::GPRRegBankID)
6954 HasGPROp = true;
6955 else
6956 HasFPROp = true;
6957 }
6958 // We have heterogenous regbanks, need to fixup.
6959 if (HasGPROp && HasFPROp)
6960 fixupPHIOpBanks(*MI, MRI, RBI);
6961 }
6962}
6963
6964namespace llvm {
6965InstructionSelector *
6966createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6967 AArch64Subtarget &Subtarget,
6968 AArch64RegisterBankInfo &RBI) {
6969 return new AArch64InstructionSelector(TM, Subtarget, RBI);
6970}
6971}