Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 995, column 7
6th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64 -I include -I /build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/build-llvm -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-10-17-004846-21170-1 -x c++ /build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "AArch64GlobalISelUtils.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "MCTargetDesc/AArch64MCTargetDesc.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
30#include "llvm/CodeGen/MachineBasicBlock.h"
31#include "llvm/CodeGen/MachineConstantPool.h"
32#include "llvm/CodeGen/MachineFunction.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineMemOperand.h"
36#include "llvm/CodeGen/MachineOperand.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetOpcodes.h"
39#include "llvm/IR/Constants.h"
40#include "llvm/IR/DerivedTypes.h"
41#include "llvm/IR/Instructions.h"
42#include "llvm/IR/PatternMatch.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/raw_ostream.h"
48
49#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
50
51using namespace llvm;
52using namespace MIPatternMatch;
53using namespace AArch64GISelUtils;
54
55namespace llvm {
56class BlockFrequencyInfo;
57class ProfileSummaryInfo;
58}
59
60namespace {
61
62#define GET_GLOBALISEL_PREDICATE_BITSET
63#include "AArch64GenGlobalISel.inc"
64#undef GET_GLOBALISEL_PREDICATE_BITSET
65
66class AArch64InstructionSelector : public InstructionSelector {
67public:
68 AArch64InstructionSelector(const AArch64TargetMachine &TM,
69 const AArch64Subtarget &STI,
70 const AArch64RegisterBankInfo &RBI);
71
72 bool select(MachineInstr &I) override;
73 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
74
75 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77 BlockFrequencyInfo *BFI) override {
78 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79 MIB.setMF(MF);
80
81 // hasFnAttribute() is expensive to call on every BRCOND selection, so
82 // cache it here for each run of the selector.
83 ProduceNonFlagSettingCondBr =
84 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85 MFReturnAddr = Register();
86
87 processPHIs(MF);
88 }
89
90private:
91 /// tblgen-erated 'select' implementation, used as the initial selector for
92 /// the patterns that don't require complex C++.
93 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94
95 // A lowering phase that runs before any selection attempts.
96 // Returns true if the instruction was modified.
97 bool preISelLower(MachineInstr &I);
98
99 // An early selection function that runs before the selectImpl() call.
100 bool earlySelect(MachineInstr &I);
101
102 // Do some preprocessing of G_PHIs before we begin selection.
103 void processPHIs(MachineFunction &MF);
104
105 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106
107 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108 bool contractCrossBankCopyIntoStore(MachineInstr &I,
109 MachineRegisterInfo &MRI);
110
111 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112
113 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114 MachineRegisterInfo &MRI) const;
115 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116 MachineRegisterInfo &MRI) const;
117
118 ///@{
119 /// Helper functions for selectCompareBranch.
120 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121 MachineIRBuilder &MIB) const;
122 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123 MachineIRBuilder &MIB) const;
124 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125 MachineIRBuilder &MIB) const;
126 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127 MachineBasicBlock *DstMBB,
128 MachineIRBuilder &MIB) const;
129 ///@}
130
131 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
132 MachineRegisterInfo &MRI);
133
134 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136
137 // Helper to generate an equivalent of scalar_to_vector into a new register,
138 // returned via 'Dst'.
139 MachineInstr *emitScalarToVector(unsigned EltSize,
140 const TargetRegisterClass *DstRC,
141 Register Scalar,
142 MachineIRBuilder &MIRBuilder) const;
143
144 /// Emit a lane insert into \p DstReg, or a new vector register if None is
145 /// provided.
146 ///
147 /// The lane inserted into is defined by \p LaneIdx. The vector source
148 /// register is given by \p SrcReg. The register containing the element is
149 /// given by \p EltReg.
150 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151 Register EltReg, unsigned LaneIdx,
152 const RegisterBank &RB,
153 MachineIRBuilder &MIRBuilder) const;
154
155 /// Emit a sequence of instructions representing a constant \p CV for a
156 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157 ///
158 /// \returns the last instruction in the sequence on success, and nullptr
159 /// otherwise.
160 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161 MachineIRBuilder &MIRBuilder,
162 MachineRegisterInfo &MRI);
163
164 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
166 MachineRegisterInfo &MRI);
167 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168 /// SUBREG_TO_REG.
169 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
173
174 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178
179 /// Helper function to select vector load intrinsics like
180 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181 /// \p Opc is the opcode that the selected instruction should use.
182 /// \p NumVecs is the number of vector destinations for the instruction.
183 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185 MachineInstr &I);
186 bool selectIntrinsicWithSideEffects(MachineInstr &I,
187 MachineRegisterInfo &MRI);
188 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
197
198 unsigned emitConstantPoolEntry(const Constant *CPVal,
199 MachineFunction &MF) const;
200 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
201 MachineIRBuilder &MIRBuilder) const;
202
203 // Emit a vector concat operation.
204 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
205 Register Op2,
206 MachineIRBuilder &MIRBuilder) const;
207
208 // Emit an integer compare between LHS and RHS, which checks for Predicate.
209 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
210 MachineOperand &Predicate,
211 MachineIRBuilder &MIRBuilder) const;
212
213 /// Emit a floating point comparison between \p LHS and \p RHS.
214 /// \p Pred if given is the intended predicate to use.
215 MachineInstr *emitFPCompare(Register LHS, Register RHS,
216 MachineIRBuilder &MIRBuilder,
217 Optional<CmpInst::Predicate> = None) const;
218
219 MachineInstr *emitInstr(unsigned Opcode,
220 std::initializer_list<llvm::DstOp> DstOps,
221 std::initializer_list<llvm::SrcOp> SrcOps,
222 MachineIRBuilder &MIRBuilder,
223 const ComplexRendererFns &RenderFns = None) const;
224 /// Helper function to emit an add or sub instruction.
225 ///
226 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
227 /// in a specific order.
228 ///
229 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
230 ///
231 /// \code
232 /// const std::array<std::array<unsigned, 2>, 4> Table {
233 /// {{AArch64::ADDXri, AArch64::ADDWri},
234 /// {AArch64::ADDXrs, AArch64::ADDWrs},
235 /// {AArch64::ADDXrr, AArch64::ADDWrr},
236 /// {AArch64::SUBXri, AArch64::SUBWri},
237 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
238 /// \endcode
239 ///
240 /// Each row in the table corresponds to a different addressing mode. Each
241 /// column corresponds to a different register size.
242 ///
243 /// \attention Rows must be structured as follows:
244 /// - Row 0: The ri opcode variants
245 /// - Row 1: The rs opcode variants
246 /// - Row 2: The rr opcode variants
247 /// - Row 3: The ri opcode variants for negative immediates
248 /// - Row 4: The rx opcode variants
249 ///
250 /// \attention Columns must be structured as follows:
251 /// - Column 0: The 64-bit opcode variants
252 /// - Column 1: The 32-bit opcode variants
253 ///
254 /// \p Dst is the destination register of the binop to emit.
255 /// \p LHS is the left-hand operand of the binop to emit.
256 /// \p RHS is the right-hand operand of the binop to emit.
257 MachineInstr *emitAddSub(
258 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
259 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
260 MachineIRBuilder &MIRBuilder) const;
261 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
262 MachineOperand &RHS,
263 MachineIRBuilder &MIRBuilder) const;
264 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
265 MachineIRBuilder &MIRBuilder) const;
266 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
267 MachineIRBuilder &MIRBuilder) const;
268 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
269 MachineIRBuilder &MIRBuilder) const;
270 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
271 MachineIRBuilder &MIRBuilder) const;
272 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
273 AArch64CC::CondCode CC,
274 MachineIRBuilder &MIRBuilder) const;
275 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
276 const RegisterBank &DstRB, LLT ScalarTy,
277 Register VecReg, unsigned LaneIdx,
278 MachineIRBuilder &MIRBuilder) const;
279
280 /// Emit a CSet for an integer compare.
281 ///
282 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
283 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
284 MachineIRBuilder &MIRBuilder,
285 Register SrcReg = AArch64::WZR) const;
286 /// Emit a CSet for a FP compare.
287 ///
288 /// \p Dst is expected to be a 32-bit scalar register.
289 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
290 MachineIRBuilder &MIRBuilder) const;
291
292 /// Emit the overflow op for \p Opcode.
293 ///
294 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
295 /// G_USUBO, etc.
296 std::pair<MachineInstr *, AArch64CC::CondCode>
297 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
298 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
299
300 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
301 /// \p IsNegative is true if the test should be "not zero".
302 /// This will also optimize the test bit instruction when possible.
303 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
304 MachineBasicBlock *DstMBB,
305 MachineIRBuilder &MIB) const;
306
307 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
308 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
309 MachineBasicBlock *DestMBB,
310 MachineIRBuilder &MIB) const;
311
312 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
313 // We use these manually instead of using the importer since it doesn't
314 // support SDNodeXForm.
315 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
316 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
317 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
318 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
319
320 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
321 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
322 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
323
324 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
325 unsigned Size) const;
326
327 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
328 return selectAddrModeUnscaled(Root, 1);
329 }
330 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
331 return selectAddrModeUnscaled(Root, 2);
332 }
333 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
334 return selectAddrModeUnscaled(Root, 4);
335 }
336 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
337 return selectAddrModeUnscaled(Root, 8);
338 }
339 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
340 return selectAddrModeUnscaled(Root, 16);
341 }
342
343 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
344 /// from complex pattern matchers like selectAddrModeIndexed().
345 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
346 MachineRegisterInfo &MRI) const;
347
348 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
349 unsigned Size) const;
350 template <int Width>
351 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
352 return selectAddrModeIndexed(Root, Width / 8);
353 }
354
355 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
356 const MachineRegisterInfo &MRI) const;
357 ComplexRendererFns
358 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
359 unsigned SizeInBytes) const;
360
361 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
362 /// or not a shift + extend should be folded into an addressing mode. Returns
363 /// None when this is not profitable or possible.
364 ComplexRendererFns
365 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
366 MachineOperand &Offset, unsigned SizeInBytes,
367 bool WantsExt) const;
368 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
369 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
370 unsigned SizeInBytes) const;
371 template <int Width>
372 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
373 return selectAddrModeXRO(Root, Width / 8);
374 }
375
376 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
377 unsigned SizeInBytes) const;
378 template <int Width>
379 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
380 return selectAddrModeWRO(Root, Width / 8);
381 }
382
383 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
384 bool AllowROR = false) const;
385
386 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
387 return selectShiftedRegister(Root);
388 }
389
390 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
391 return selectShiftedRegister(Root, true);
392 }
393
394 /// Given an extend instruction, determine the correct shift-extend type for
395 /// that instruction.
396 ///
397 /// If the instruction is going to be used in a load or store, pass
398 /// \p IsLoadStore = true.
399 AArch64_AM::ShiftExtendType
400 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
401 bool IsLoadStore = false) const;
402
403 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
404 ///
405 /// \returns Either \p Reg if no change was necessary, or the new register
406 /// created by moving \p Reg.
407 ///
408 /// Note: This uses emitCopy right now.
409 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
410 MachineIRBuilder &MIB) const;
411
412 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
413
414 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
415 int OpIdx = -1) const;
416 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
417 int OpIdx = -1) const;
418 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
419 int OpIdx = -1) const;
420 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
421 int OpIdx = -1) const;
422 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
423 int OpIdx = -1) const;
424 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
425 int OpIdx = -1) const;
426
427 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
428 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
429
430 // Optimization methods.
431 bool tryOptSelect(MachineInstr &MI);
432 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
433 MachineOperand &Predicate,
434 MachineIRBuilder &MIRBuilder) const;
435
436 /// Return true if \p MI is a load or store of \p NumBytes bytes.
437 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
438
439 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
440 /// register zeroed out. In other words, the result of MI has been explicitly
441 /// zero extended.
442 bool isDef32(const MachineInstr &MI) const;
443
444 const AArch64TargetMachine &TM;
445 const AArch64Subtarget &STI;
446 const AArch64InstrInfo &TII;
447 const AArch64RegisterInfo &TRI;
448 const AArch64RegisterBankInfo &RBI;
449
450 bool ProduceNonFlagSettingCondBr = false;
451
452 // Some cached values used during selection.
453 // We use LR as a live-in register, and we keep track of it here as it can be
454 // clobbered by calls.
455 Register MFReturnAddr;
456
457 MachineIRBuilder MIB;
458
459#define GET_GLOBALISEL_PREDICATES_DECL
460#include "AArch64GenGlobalISel.inc"
461#undef GET_GLOBALISEL_PREDICATES_DECL
462
463// We declare the temporaries used by selectImpl() in the class to minimize the
464// cost of constructing placeholder values.
465#define GET_GLOBALISEL_TEMPORARIES_DECL
466#include "AArch64GenGlobalISel.inc"
467#undef GET_GLOBALISEL_TEMPORARIES_DECL
468};
469
470} // end anonymous namespace
471
472#define GET_GLOBALISEL_IMPL
473#include "AArch64GenGlobalISel.inc"
474#undef GET_GLOBALISEL_IMPL
475
476AArch64InstructionSelector::AArch64InstructionSelector(
477 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
478 const AArch64RegisterBankInfo &RBI)
479 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
480 TRI(*STI.getRegisterInfo()), RBI(RBI),
481#define GET_GLOBALISEL_PREDICATES_INIT
482#include "AArch64GenGlobalISel.inc"
483#undef GET_GLOBALISEL_PREDICATES_INIT
484#define GET_GLOBALISEL_TEMPORARIES_INIT
485#include "AArch64GenGlobalISel.inc"
486#undef GET_GLOBALISEL_TEMPORARIES_INIT
487{
488}
489
490// FIXME: This should be target-independent, inferred from the types declared
491// for each class in the bank.
492static const TargetRegisterClass *
493getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
494 const RegisterBankInfo &RBI,
495 bool GetAllRegSet = false) {
496 if (RB.getID() == AArch64::GPRRegBankID) {
497 if (Ty.getSizeInBits() <= 32)
498 return GetAllRegSet ? &AArch64::GPR32allRegClass
499 : &AArch64::GPR32RegClass;
500 if (Ty.getSizeInBits() == 64)
501 return GetAllRegSet ? &AArch64::GPR64allRegClass
502 : &AArch64::GPR64RegClass;
503 if (Ty.getSizeInBits() == 128)
504 return &AArch64::XSeqPairsClassRegClass;
505 return nullptr;
506 }
507
508 if (RB.getID() == AArch64::FPRRegBankID) {
509 switch (Ty.getSizeInBits()) {
510 case 8:
511 return &AArch64::FPR8RegClass;
512 case 16:
513 return &AArch64::FPR16RegClass;
514 case 32:
515 return &AArch64::FPR32RegClass;
516 case 64:
517 return &AArch64::FPR64RegClass;
518 case 128:
519 return &AArch64::FPR128RegClass;
520 }
521 return nullptr;
522 }
523
524 return nullptr;
525}
526
527/// Given a register bank, and size in bits, return the smallest register class
528/// that can represent that combination.
529static const TargetRegisterClass *
530getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
531 bool GetAllRegSet = false) {
532 unsigned RegBankID = RB.getID();
533
534 if (RegBankID == AArch64::GPRRegBankID) {
535 if (SizeInBits <= 32)
536 return GetAllRegSet ? &AArch64::GPR32allRegClass
537 : &AArch64::GPR32RegClass;
538 if (SizeInBits == 64)
539 return GetAllRegSet ? &AArch64::GPR64allRegClass
540 : &AArch64::GPR64RegClass;
541 if (SizeInBits == 128)
542 return &AArch64::XSeqPairsClassRegClass;
543 }
544
545 if (RegBankID == AArch64::FPRRegBankID) {
546 switch (SizeInBits) {
547 default:
548 return nullptr;
549 case 8:
550 return &AArch64::FPR8RegClass;
551 case 16:
552 return &AArch64::FPR16RegClass;
553 case 32:
554 return &AArch64::FPR32RegClass;
555 case 64:
556 return &AArch64::FPR64RegClass;
557 case 128:
558 return &AArch64::FPR128RegClass;
559 }
560 }
561
562 return nullptr;
563}
564
565/// Returns the correct subregister to use for a given register class.
566static bool getSubRegForClass(const TargetRegisterClass *RC,
567 const TargetRegisterInfo &TRI, unsigned &SubReg) {
568 switch (TRI.getRegSizeInBits(*RC)) {
47
Control jumps to the 'default' case at line 584
569 case 8:
570 SubReg = AArch64::bsub;
571 break;
572 case 16:
573 SubReg = AArch64::hsub;
574 break;
575 case 32:
576 if (RC != &AArch64::FPR32RegClass)
577 SubReg = AArch64::sub_32;
578 else
579 SubReg = AArch64::ssub;
580 break;
581 case 64:
582 SubReg = AArch64::dsub;
583 break;
584 default:
585 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
48
Assuming 'DebugFlag' is false
49
Loop condition is false. Exiting loop
586 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
587 return false;
50
Returning without writing to 'SubReg'
588 }
589
590 return true;
591}
592
593/// Returns the minimum size the given register bank can hold.
594static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
595 switch (RB.getID()) {
596 case AArch64::GPRRegBankID:
597 return 32;
598 case AArch64::FPRRegBankID:
599 return 8;
600 default:
601 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 601)
;
602 }
603}
604
605/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
606/// Helper function for functions like createDTuple and createQTuple.
607///
608/// \p RegClassIDs - The list of register class IDs available for some tuple of
609/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
610/// expected to contain between 2 and 4 tuple classes.
611///
612/// \p SubRegs - The list of subregister classes associated with each register
613/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
614/// subregister class. The index of each subregister class is expected to
615/// correspond with the index of each register class.
616///
617/// \returns Either the destination register of REG_SEQUENCE instruction that
618/// was created, or the 0th element of \p Regs if \p Regs contains a single
619/// element.
620static Register createTuple(ArrayRef<Register> Regs,
621 const unsigned RegClassIDs[],
622 const unsigned SubRegs[], MachineIRBuilder &MIB) {
623 unsigned NumRegs = Regs.size();
624 if (NumRegs == 1)
625 return Regs[0];
626 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 627, __extension__ __PRETTY_FUNCTION__))
627 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 627, __extension__ __PRETTY_FUNCTION__))
;
628 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
629 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
630 auto RegSequence =
631 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
632 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
633 RegSequence.addUse(Regs[I]);
634 RegSequence.addImm(SubRegs[I]);
635 }
636 return RegSequence.getReg(0);
637}
638
639/// Create a tuple of D-registers using the registers in \p Regs.
640static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
641 static const unsigned RegClassIDs[] = {
642 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
643 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
644 AArch64::dsub2, AArch64::dsub3};
645 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
646}
647
648/// Create a tuple of Q-registers using the registers in \p Regs.
649static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
650 static const unsigned RegClassIDs[] = {
651 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
652 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
653 AArch64::qsub2, AArch64::qsub3};
654 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
655}
656
657static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
658 auto &MI = *Root.getParent();
659 auto &MBB = *MI.getParent();
660 auto &MF = *MBB.getParent();
661 auto &MRI = MF.getRegInfo();
662 uint64_t Immed;
663 if (Root.isImm())
664 Immed = Root.getImm();
665 else if (Root.isCImm())
666 Immed = Root.getCImm()->getZExtValue();
667 else if (Root.isReg()) {
668 auto ValAndVReg =
669 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
670 if (!ValAndVReg)
671 return None;
672 Immed = ValAndVReg->Value.getSExtValue();
673 } else
674 return None;
675 return Immed;
676}
677
678/// Check whether \p I is a currently unsupported binary operation:
679/// - it has an unsized type
680/// - an operand is not a vreg
681/// - all operands are not in the same bank
682/// These are checks that should someday live in the verifier, but right now,
683/// these are mostly limitations of the aarch64 selector.
684static bool unsupportedBinOp(const MachineInstr &I,
685 const AArch64RegisterBankInfo &RBI,
686 const MachineRegisterInfo &MRI,
687 const AArch64RegisterInfo &TRI) {
688 LLT Ty = MRI.getType(I.getOperand(0).getReg());
689 if (!Ty.isValid()) {
690 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
691 return true;
692 }
693
694 const RegisterBank *PrevOpBank = nullptr;
695 for (auto &MO : I.operands()) {
696 // FIXME: Support non-register operands.
697 if (!MO.isReg()) {
698 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
699 return true;
700 }
701
702 // FIXME: Can generic operations have physical registers operands? If
703 // so, this will need to be taught about that, and we'll need to get the
704 // bank out of the minimal class for the register.
705 // Either way, this needs to be documented (and possibly verified).
706 if (!Register::isVirtualRegister(MO.getReg())) {
707 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
708 return true;
709 }
710
711 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
712 if (!OpBank) {
713 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
714 return true;
715 }
716
717 if (PrevOpBank && OpBank != PrevOpBank) {
718 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
719 return true;
720 }
721 PrevOpBank = OpBank;
722 }
723 return false;
724}
725
726/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
727/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
728/// and of size \p OpSize.
729/// \returns \p GenericOpc if the combination is unsupported.
730static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
731 unsigned OpSize) {
732 switch (RegBankID) {
733 case AArch64::GPRRegBankID:
734 if (OpSize == 32) {
735 switch (GenericOpc) {
736 case TargetOpcode::G_SHL:
737 return AArch64::LSLVWr;
738 case TargetOpcode::G_LSHR:
739 return AArch64::LSRVWr;
740 case TargetOpcode::G_ASHR:
741 return AArch64::ASRVWr;
742 default:
743 return GenericOpc;
744 }
745 } else if (OpSize == 64) {
746 switch (GenericOpc) {
747 case TargetOpcode::G_PTR_ADD:
748 return AArch64::ADDXrr;
749 case TargetOpcode::G_SHL:
750 return AArch64::LSLVXr;
751 case TargetOpcode::G_LSHR:
752 return AArch64::LSRVXr;
753 case TargetOpcode::G_ASHR:
754 return AArch64::ASRVXr;
755 default:
756 return GenericOpc;
757 }
758 }
759 break;
760 case AArch64::FPRRegBankID:
761 switch (OpSize) {
762 case 32:
763 switch (GenericOpc) {
764 case TargetOpcode::G_FADD:
765 return AArch64::FADDSrr;
766 case TargetOpcode::G_FSUB:
767 return AArch64::FSUBSrr;
768 case TargetOpcode::G_FMUL:
769 return AArch64::FMULSrr;
770 case TargetOpcode::G_FDIV:
771 return AArch64::FDIVSrr;
772 default:
773 return GenericOpc;
774 }
775 case 64:
776 switch (GenericOpc) {
777 case TargetOpcode::G_FADD:
778 return AArch64::FADDDrr;
779 case TargetOpcode::G_FSUB:
780 return AArch64::FSUBDrr;
781 case TargetOpcode::G_FMUL:
782 return AArch64::FMULDrr;
783 case TargetOpcode::G_FDIV:
784 return AArch64::FDIVDrr;
785 case TargetOpcode::G_OR:
786 return AArch64::ORRv8i8;
787 default:
788 return GenericOpc;
789 }
790 }
791 break;
792 }
793 return GenericOpc;
794}
795
796/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
797/// appropriate for the (value) register bank \p RegBankID and of memory access
798/// size \p OpSize. This returns the variant with the base+unsigned-immediate
799/// addressing mode (e.g., LDRXui).
800/// \returns \p GenericOpc if the combination is unsupported.
801static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
802 unsigned OpSize) {
803 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
804 switch (RegBankID) {
805 case AArch64::GPRRegBankID:
806 switch (OpSize) {
807 case 8:
808 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
809 case 16:
810 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
811 case 32:
812 return isStore ? AArch64::STRWui : AArch64::LDRWui;
813 case 64:
814 return isStore ? AArch64::STRXui : AArch64::LDRXui;
815 }
816 break;
817 case AArch64::FPRRegBankID:
818 switch (OpSize) {
819 case 8:
820 return isStore ? AArch64::STRBui : AArch64::LDRBui;
821 case 16:
822 return isStore ? AArch64::STRHui : AArch64::LDRHui;
823 case 32:
824 return isStore ? AArch64::STRSui : AArch64::LDRSui;
825 case 64:
826 return isStore ? AArch64::STRDui : AArch64::LDRDui;
827 case 128:
828 return isStore ? AArch64::STRQui : AArch64::LDRQui;
829 }
830 break;
831 }
832 return GenericOpc;
833}
834
835#ifndef NDEBUG
836/// Helper function that verifies that we have a valid copy at the end of
837/// selectCopy. Verifies that the source and dest have the expected sizes and
838/// then returns true.
839static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
840 const MachineRegisterInfo &MRI,
841 const TargetRegisterInfo &TRI,
842 const RegisterBankInfo &RBI) {
843 const Register DstReg = I.getOperand(0).getReg();
844 const Register SrcReg = I.getOperand(1).getReg();
845 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
846 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
847
848 // Make sure the size of the source and dest line up.
849 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
850 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
851 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
852 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
853 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
854 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
855 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
856 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
857 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
858 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 858, __extension__ __PRETTY_FUNCTION__))
;
859
860 // Check the size of the destination.
861 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
862 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 862, __extension__ __PRETTY_FUNCTION__))
;
863
864 return true;
865}
866#endif
867
868/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
869/// to \p *To.
870///
871/// E.g "To = COPY SrcReg:SubReg"
872static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
873 const RegisterBankInfo &RBI, Register SrcReg,
874 const TargetRegisterClass *To, unsigned SubReg) {
875 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 875, __extension__ __PRETTY_FUNCTION__))
;
876 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 876, __extension__ __PRETTY_FUNCTION__))
;
877 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 877, __extension__ __PRETTY_FUNCTION__))
;
878
879 MachineIRBuilder MIB(I);
880 auto SubRegCopy =
881 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
882 MachineOperand &RegOp = I.getOperand(1);
883 RegOp.setReg(SubRegCopy.getReg(0));
884
885 // It's possible that the destination register won't be constrained. Make
886 // sure that happens.
887 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
888 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
889
890 return true;
891}
892
893/// Helper function to get the source and destination register classes for a
894/// copy. Returns a std::pair containing the source register class for the
895/// copy, and the destination register class for the copy. If a register class
896/// cannot be determined, then it will be nullptr.
897static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
898getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
899 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
900 const RegisterBankInfo &RBI) {
901 Register DstReg = I.getOperand(0).getReg();
902 Register SrcReg = I.getOperand(1).getReg();
903 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
904 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
905 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
906 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
907
908 // Special casing for cross-bank copies of s1s. We can technically represent
909 // a 1-bit value with any size of register. The minimum size for a GPR is 32
910 // bits. So, we need to put the FPR on 32 bits as well.
911 //
912 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
913 // then we can pull it into the helpers that get the appropriate class for a
914 // register bank. Or make a new helper that carries along some constraint
915 // information.
916 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
917 SrcSize = DstSize = 32;
918
919 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
920 getMinClassForRegBank(DstRegBank, DstSize, true)};
921}
922
923static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
924 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
925 const RegisterBankInfo &RBI) {
926 Register DstReg = I.getOperand(0).getReg();
927 Register SrcReg = I.getOperand(1).getReg();
928 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
929 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
930
931 // Find the correct register classes for the source and destination registers.
932 const TargetRegisterClass *SrcRC;
933 const TargetRegisterClass *DstRC;
934 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
16
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
27
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
28
Calling 'tuple::operator='
31
Returning from 'tuple::operator='
935
936 if (!DstRC) {
32
Assuming 'DstRC' is non-null
33
Taking false branch
937 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
938 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
939 return false;
940 }
941
942 // A couple helpers below, for making sure that the copy we produce is valid.
943
944 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
945 // to verify that the src and dst are the same size, since that's handled by
946 // the SUBREG_TO_REG.
947 bool KnownValid = false;
948
949 // Returns true, or asserts if something we don't expect happens. Instead of
950 // returning true, we return isValidCopy() to ensure that we verify the
951 // result.
952 auto CheckCopy = [&]() {
953 // If we have a bitcast or something, we can't have physical registers.
954 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 957, __extension__ __PRETTY_FUNCTION__))
955 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 957, __extension__ __PRETTY_FUNCTION__))
956 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 957, __extension__ __PRETTY_FUNCTION__))
957 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 957, __extension__ __PRETTY_FUNCTION__))
;
958 bool ValidCopy = true;
959#ifndef NDEBUG
960 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
961 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 961, __extension__ __PRETTY_FUNCTION__))
;
962#endif
963 (void)KnownValid;
964 return ValidCopy;
965 };
966
967 // Is this a copy? If so, then we may need to insert a subregister copy.
968 if (I.isCopy()) {
34
Calling 'MachineInstr::isCopy'
37
Returning from 'MachineInstr::isCopy'
38
Taking true branch
969 // Yes. Check if there's anything to fix up.
970 if (!SrcRC) {
39
Assuming 'SrcRC' is non-null
40
Taking false branch
971 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
972 return false;
973 }
974
975 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
976 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
977 unsigned SubReg;
41
'SubReg' declared without an initial value
978
979 // If the source bank doesn't support a subregister copy small enough,
980 // then we first need to copy to the destination bank.
981 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
42
Assuming the condition is false
43
Taking false branch
982 const TargetRegisterClass *DstTempRC =
983 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
984 getSubRegForClass(DstRC, TRI, SubReg);
985
986 MachineIRBuilder MIB(I);
987 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
988 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
989 } else if (SrcSize > DstSize) {
44
Assuming 'SrcSize' is > 'DstSize'
45
Taking true branch
990 // If the source register is bigger than the destination we need to
991 // perform a subregister copy.
992 const TargetRegisterClass *SubRegRC =
993 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
994 getSubRegForClass(SubRegRC, TRI, SubReg);
46
Calling 'getSubRegForClass'
51
Returning from 'getSubRegForClass'
995 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
52
6th function call argument is an uninitialized value
996 } else if (DstSize > SrcSize) {
997 // If the destination register is bigger than the source we need to do
998 // a promotion using SUBREG_TO_REG.
999 const TargetRegisterClass *PromotionRC =
1000 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1001 getSubRegForClass(SrcRC, TRI, SubReg);
1002
1003 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1004 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1005 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1006 .addImm(0)
1007 .addUse(SrcReg)
1008 .addImm(SubReg);
1009 MachineOperand &RegOp = I.getOperand(1);
1010 RegOp.setReg(PromoteReg);
1011
1012 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1013 KnownValid = true;
1014 }
1015
1016 // If the destination is a physical register, then there's nothing to
1017 // change, so we're done.
1018 if (Register::isPhysicalRegister(DstReg))
1019 return CheckCopy();
1020 }
1021
1022 // No need to constrain SrcReg. It will get constrained when we hit another
1023 // of its use or its defs. Copies do not have constraints.
1024 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1025 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1026 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1027 return false;
1028 }
1029
1030 // If this a GPR ZEXT that we want to just reduce down into a copy.
1031 // The sizes will be mismatched with the source < 32b but that's ok.
1032 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1033 I.setDesc(TII.get(AArch64::COPY));
1034 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1034, __extension__ __PRETTY_FUNCTION__))
;
1035 return selectCopy(I, TII, MRI, TRI, RBI);
1036 }
1037
1038 I.setDesc(TII.get(AArch64::COPY));
1039 return CheckCopy();
1040}
1041
1042static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1043 if (!DstTy.isScalar() || !SrcTy.isScalar())
1044 return GenericOpc;
1045
1046 const unsigned DstSize = DstTy.getSizeInBits();
1047 const unsigned SrcSize = SrcTy.getSizeInBits();
1048
1049 switch (DstSize) {
1050 case 32:
1051 switch (SrcSize) {
1052 case 32:
1053 switch (GenericOpc) {
1054 case TargetOpcode::G_SITOFP:
1055 return AArch64::SCVTFUWSri;
1056 case TargetOpcode::G_UITOFP:
1057 return AArch64::UCVTFUWSri;
1058 case TargetOpcode::G_FPTOSI:
1059 return AArch64::FCVTZSUWSr;
1060 case TargetOpcode::G_FPTOUI:
1061 return AArch64::FCVTZUUWSr;
1062 default:
1063 return GenericOpc;
1064 }
1065 case 64:
1066 switch (GenericOpc) {
1067 case TargetOpcode::G_SITOFP:
1068 return AArch64::SCVTFUXSri;
1069 case TargetOpcode::G_UITOFP:
1070 return AArch64::UCVTFUXSri;
1071 case TargetOpcode::G_FPTOSI:
1072 return AArch64::FCVTZSUWDr;
1073 case TargetOpcode::G_FPTOUI:
1074 return AArch64::FCVTZUUWDr;
1075 default:
1076 return GenericOpc;
1077 }
1078 default:
1079 return GenericOpc;
1080 }
1081 case 64:
1082 switch (SrcSize) {
1083 case 32:
1084 switch (GenericOpc) {
1085 case TargetOpcode::G_SITOFP:
1086 return AArch64::SCVTFUWDri;
1087 case TargetOpcode::G_UITOFP:
1088 return AArch64::UCVTFUWDri;
1089 case TargetOpcode::G_FPTOSI:
1090 return AArch64::FCVTZSUXSr;
1091 case TargetOpcode::G_FPTOUI:
1092 return AArch64::FCVTZUUXSr;
1093 default:
1094 return GenericOpc;
1095 }
1096 case 64:
1097 switch (GenericOpc) {
1098 case TargetOpcode::G_SITOFP:
1099 return AArch64::SCVTFUXDri;
1100 case TargetOpcode::G_UITOFP:
1101 return AArch64::UCVTFUXDri;
1102 case TargetOpcode::G_FPTOSI:
1103 return AArch64::FCVTZSUXDr;
1104 case TargetOpcode::G_FPTOUI:
1105 return AArch64::FCVTZUUXDr;
1106 default:
1107 return GenericOpc;
1108 }
1109 default:
1110 return GenericOpc;
1111 }
1112 default:
1113 return GenericOpc;
1114 };
1115 return GenericOpc;
1116}
1117
1118MachineInstr *
1119AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1120 Register False, AArch64CC::CondCode CC,
1121 MachineIRBuilder &MIB) const {
1122 MachineRegisterInfo &MRI = *MIB.getMRI();
1123 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1125, __extension__ __PRETTY_FUNCTION__))
1124 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1125, __extension__ __PRETTY_FUNCTION__))
1125 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1125, __extension__ __PRETTY_FUNCTION__))
;
1126 LLT Ty = MRI.getType(True);
1127 if (Ty.isVector())
1128 return nullptr;
1129 const unsigned Size = Ty.getSizeInBits();
1130 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1131, __extension__ __PRETTY_FUNCTION__))
1131 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1131, __extension__ __PRETTY_FUNCTION__))
;
1132 const bool Is32Bit = Size == 32;
1133 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1134 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1135 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1136 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1137 return &*FCSel;
1138 }
1139
1140 // By default, we'll try and emit a CSEL.
1141 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1142 bool Optimized = false;
1143 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1144 &Optimized](Register &Reg, Register &OtherReg,
1145 bool Invert) {
1146 if (Optimized)
1147 return false;
1148
1149 // Attempt to fold:
1150 //
1151 // %sub = G_SUB 0, %x
1152 // %select = G_SELECT cc, %reg, %sub
1153 //
1154 // Into:
1155 // %select = CSNEG %reg, %x, cc
1156 Register MatchReg;
1157 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1158 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1159 Reg = MatchReg;
1160 if (Invert) {
1161 CC = AArch64CC::getInvertedCondCode(CC);
1162 std::swap(Reg, OtherReg);
1163 }
1164 return true;
1165 }
1166
1167 // Attempt to fold:
1168 //
1169 // %xor = G_XOR %x, -1
1170 // %select = G_SELECT cc, %reg, %xor
1171 //
1172 // Into:
1173 // %select = CSINV %reg, %x, cc
1174 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1175 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1176 Reg = MatchReg;
1177 if (Invert) {
1178 CC = AArch64CC::getInvertedCondCode(CC);
1179 std::swap(Reg, OtherReg);
1180 }
1181 return true;
1182 }
1183
1184 // Attempt to fold:
1185 //
1186 // %add = G_ADD %x, 1
1187 // %select = G_SELECT cc, %reg, %add
1188 //
1189 // Into:
1190 // %select = CSINC %reg, %x, cc
1191 if (mi_match(Reg, MRI,
1192 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1193 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1194 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1195 Reg = MatchReg;
1196 if (Invert) {
1197 CC = AArch64CC::getInvertedCondCode(CC);
1198 std::swap(Reg, OtherReg);
1199 }
1200 return true;
1201 }
1202
1203 return false;
1204 };
1205
1206 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1207 // true/false values are constants.
1208 // FIXME: All of these patterns already exist in tablegen. We should be
1209 // able to import these.
1210 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1211 &Optimized]() {
1212 if (Optimized)
1213 return false;
1214 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1215 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1216 if (!TrueCst && !FalseCst)
1217 return false;
1218
1219 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1220 if (TrueCst && FalseCst) {
1221 int64_t T = TrueCst->Value.getSExtValue();
1222 int64_t F = FalseCst->Value.getSExtValue();
1223
1224 if (T == 0 && F == 1) {
1225 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1226 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1227 True = ZReg;
1228 False = ZReg;
1229 return true;
1230 }
1231
1232 if (T == 0 && F == -1) {
1233 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1234 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1235 True = ZReg;
1236 False = ZReg;
1237 return true;
1238 }
1239 }
1240
1241 if (TrueCst) {
1242 int64_t T = TrueCst->Value.getSExtValue();
1243 if (T == 1) {
1244 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1245 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1246 True = False;
1247 False = ZReg;
1248 CC = AArch64CC::getInvertedCondCode(CC);
1249 return true;
1250 }
1251
1252 if (T == -1) {
1253 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1254 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1255 True = False;
1256 False = ZReg;
1257 CC = AArch64CC::getInvertedCondCode(CC);
1258 return true;
1259 }
1260 }
1261
1262 if (FalseCst) {
1263 int64_t F = FalseCst->Value.getSExtValue();
1264 if (F == 1) {
1265 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1266 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1267 False = ZReg;
1268 return true;
1269 }
1270
1271 if (F == -1) {
1272 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1274 False = ZReg;
1275 return true;
1276 }
1277 }
1278 return false;
1279 };
1280
1281 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1282 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1283 Optimized |= TryOptSelectCst();
1284 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1285 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1286 return &*SelectInst;
1287}
1288
1289static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1290 switch (P) {
1291 default:
1292 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1292)
;
1293 case CmpInst::ICMP_NE:
1294 return AArch64CC::NE;
1295 case CmpInst::ICMP_EQ:
1296 return AArch64CC::EQ;
1297 case CmpInst::ICMP_SGT:
1298 return AArch64CC::GT;
1299 case CmpInst::ICMP_SGE:
1300 return AArch64CC::GE;
1301 case CmpInst::ICMP_SLT:
1302 return AArch64CC::LT;
1303 case CmpInst::ICMP_SLE:
1304 return AArch64CC::LE;
1305 case CmpInst::ICMP_UGT:
1306 return AArch64CC::HI;
1307 case CmpInst::ICMP_UGE:
1308 return AArch64CC::HS;
1309 case CmpInst::ICMP_ULT:
1310 return AArch64CC::LO;
1311 case CmpInst::ICMP_ULE:
1312 return AArch64CC::LS;
1313 }
1314}
1315
1316/// Return a register which can be used as a bit to test in a TB(N)Z.
1317static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1318 MachineRegisterInfo &MRI) {
1319 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1319, __extension__ __PRETTY_FUNCTION__))
;
1320 bool HasZext = false;
1321 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1322 unsigned Opc = MI->getOpcode();
1323
1324 if (!MI->getOperand(0).isReg() ||
1325 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1326 break;
1327
1328 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1329 //
1330 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1331 // on the truncated x is the same as the bit number on x.
1332 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1333 Opc == TargetOpcode::G_TRUNC) {
1334 if (Opc == TargetOpcode::G_ZEXT)
1335 HasZext = true;
1336
1337 Register NextReg = MI->getOperand(1).getReg();
1338 // Did we find something worth folding?
1339 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1340 break;
1341
1342 // NextReg is worth folding. Keep looking.
1343 Reg = NextReg;
1344 continue;
1345 }
1346
1347 // Attempt to find a suitable operation with a constant on one side.
1348 Optional<uint64_t> C;
1349 Register TestReg;
1350 switch (Opc) {
1351 default:
1352 break;
1353 case TargetOpcode::G_AND:
1354 case TargetOpcode::G_XOR: {
1355 TestReg = MI->getOperand(1).getReg();
1356 Register ConstantReg = MI->getOperand(2).getReg();
1357 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1358 if (!VRegAndVal) {
1359 // AND commutes, check the other side for a constant.
1360 // FIXME: Can we canonicalize the constant so that it's always on the
1361 // same side at some point earlier?
1362 std::swap(ConstantReg, TestReg);
1363 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1364 }
1365 if (VRegAndVal) {
1366 if (HasZext)
1367 C = VRegAndVal->Value.getZExtValue();
1368 else
1369 C = VRegAndVal->Value.getSExtValue();
1370 }
1371 break;
1372 }
1373 case TargetOpcode::G_ASHR:
1374 case TargetOpcode::G_LSHR:
1375 case TargetOpcode::G_SHL: {
1376 TestReg = MI->getOperand(1).getReg();
1377 auto VRegAndVal =
1378 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1379 if (VRegAndVal)
1380 C = VRegAndVal->Value.getSExtValue();
1381 break;
1382 }
1383 }
1384
1385 // Didn't find a constant or viable register. Bail out of the loop.
1386 if (!C || !TestReg.isValid())
1387 break;
1388
1389 // We found a suitable instruction with a constant. Check to see if we can
1390 // walk through the instruction.
1391 Register NextReg;
1392 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1393 switch (Opc) {
1394 default:
1395 break;
1396 case TargetOpcode::G_AND:
1397 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1398 if ((*C >> Bit) & 1)
1399 NextReg = TestReg;
1400 break;
1401 case TargetOpcode::G_SHL:
1402 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1403 // the type of the register.
1404 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1405 NextReg = TestReg;
1406 Bit = Bit - *C;
1407 }
1408 break;
1409 case TargetOpcode::G_ASHR:
1410 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1411 // in x
1412 NextReg = TestReg;
1413 Bit = Bit + *C;
1414 if (Bit >= TestRegSize)
1415 Bit = TestRegSize - 1;
1416 break;
1417 case TargetOpcode::G_LSHR:
1418 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1419 if ((Bit + *C) < TestRegSize) {
1420 NextReg = TestReg;
1421 Bit = Bit + *C;
1422 }
1423 break;
1424 case TargetOpcode::G_XOR:
1425 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1426 // appropriate.
1427 //
1428 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1429 //
1430 // tbz x', b -> tbnz x, b
1431 //
1432 // Because x' only has the b-th bit set if x does not.
1433 if ((*C >> Bit) & 1)
1434 Invert = !Invert;
1435 NextReg = TestReg;
1436 break;
1437 }
1438
1439 // Check if we found anything worth folding.
1440 if (!NextReg.isValid())
1441 return Reg;
1442 Reg = NextReg;
1443 }
1444
1445 return Reg;
1446}
1447
1448MachineInstr *AArch64InstructionSelector::emitTestBit(
1449 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1450 MachineIRBuilder &MIB) const {
1451 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1451, __extension__ __PRETTY_FUNCTION__))
;
1452 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1453, __extension__ __PRETTY_FUNCTION__))
1453 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1453, __extension__ __PRETTY_FUNCTION__))
;
1454 MachineRegisterInfo &MRI = *MIB.getMRI();
1455
1456 // Attempt to optimize the test bit by walking over instructions.
1457 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1458 LLT Ty = MRI.getType(TestReg);
1459 unsigned Size = Ty.getSizeInBits();
1460 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1460, __extension__ __PRETTY_FUNCTION__))
;
1461 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1461, __extension__ __PRETTY_FUNCTION__))
;
1462
1463 // When the test register is a 64-bit register, we have to narrow to make
1464 // TBNZW work.
1465 bool UseWReg = Bit < 32;
1466 unsigned NecessarySize = UseWReg ? 32 : 64;
1467 if (Size != NecessarySize)
1468 TestReg = moveScalarRegClass(
1469 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1470 MIB);
1471
1472 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1473 {AArch64::TBZW, AArch64::TBNZW}};
1474 unsigned Opc = OpcTable[UseWReg][IsNegative];
1475 auto TestBitMI =
1476 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1477 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1478 return &*TestBitMI;
1479}
1480
1481bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1482 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1483 MachineIRBuilder &MIB) const {
1484 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1484, __extension__ __PRETTY_FUNCTION__))
;
1485 // Given something like this:
1486 //
1487 // %x = ...Something...
1488 // %one = G_CONSTANT i64 1
1489 // %zero = G_CONSTANT i64 0
1490 // %and = G_AND %x, %one
1491 // %cmp = G_ICMP intpred(ne), %and, %zero
1492 // %cmp_trunc = G_TRUNC %cmp
1493 // G_BRCOND %cmp_trunc, %bb.3
1494 //
1495 // We want to try and fold the AND into the G_BRCOND and produce either a
1496 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1497 //
1498 // In this case, we'd get
1499 //
1500 // TBNZ %x %bb.3
1501 //
1502
1503 // Check if the AND has a constant on its RHS which we can use as a mask.
1504 // If it's a power of 2, then it's the same as checking a specific bit.
1505 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1506 auto MaybeBit = getIConstantVRegValWithLookThrough(
1507 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1508 if (!MaybeBit)
1509 return false;
1510
1511 int32_t Bit = MaybeBit->Value.exactLogBase2();
1512 if (Bit < 0)
1513 return false;
1514
1515 Register TestReg = AndInst.getOperand(1).getReg();
1516
1517 // Emit a TB(N)Z.
1518 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1519 return true;
1520}
1521
1522MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1523 bool IsNegative,
1524 MachineBasicBlock *DestMBB,
1525 MachineIRBuilder &MIB) const {
1526 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 MachineRegisterInfo &MRI = *MIB.getMRI();
1528 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1530, __extension__ __PRETTY_FUNCTION__))
1529 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1530, __extension__ __PRETTY_FUNCTION__))
1530 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1530, __extension__ __PRETTY_FUNCTION__))
;
1531 auto Ty = MRI.getType(CompareReg);
1532 unsigned Width = Ty.getSizeInBits();
1533 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1533, __extension__ __PRETTY_FUNCTION__))
;
1534 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1534, __extension__ __PRETTY_FUNCTION__))
;
1535 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1536 {AArch64::CBNZW, AArch64::CBNZX}};
1537 unsigned Opc = OpcTable[IsNegative][Width == 64];
1538 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1539 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1540 return &*BranchMI;
1541}
1542
1543bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1544 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1545 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1545, __extension__ __PRETTY_FUNCTION__))
;
1546 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1546, __extension__ __PRETTY_FUNCTION__))
;
1547 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1548 // totally clean. Some of them require two branches to implement.
1549 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1550 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1551 Pred);
1552 AArch64CC::CondCode CC1, CC2;
1553 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1554 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1555 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1556 if (CC2 != AArch64CC::AL)
1557 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1558 I.eraseFromParent();
1559 return true;
1560}
1561
1562bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1563 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1564 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1564, __extension__ __PRETTY_FUNCTION__))
;
1565 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1565, __extension__ __PRETTY_FUNCTION__))
;
1566 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1567 //
1568 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1569 // instructions will not be produced, as they are conditional branch
1570 // instructions that do not set flags.
1571 if (!ProduceNonFlagSettingCondBr)
1572 return false;
1573
1574 MachineRegisterInfo &MRI = *MIB.getMRI();
1575 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1576 auto Pred =
1577 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1578 Register LHS = ICmp.getOperand(2).getReg();
1579 Register RHS = ICmp.getOperand(3).getReg();
1580
1581 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1582 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1583 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1584
1585 // When we can emit a TB(N)Z, prefer that.
1586 //
1587 // Handle non-commutative condition codes first.
1588 // Note that we don't want to do this when we have a G_AND because it can
1589 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1590 if (VRegAndVal && !AndInst) {
1591 int64_t C = VRegAndVal->Value.getSExtValue();
1592
1593 // When we have a greater-than comparison, we can just test if the msb is
1594 // zero.
1595 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1596 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1597 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1598 I.eraseFromParent();
1599 return true;
1600 }
1601
1602 // When we have a less than comparison, we can just test if the msb is not
1603 // zero.
1604 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1605 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1606 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1607 I.eraseFromParent();
1608 return true;
1609 }
1610 }
1611
1612 // Attempt to handle commutative condition codes. Right now, that's only
1613 // eq/ne.
1614 if (ICmpInst::isEquality(Pred)) {
1615 if (!VRegAndVal) {
1616 std::swap(RHS, LHS);
1617 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1618 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1619 }
1620
1621 if (VRegAndVal && VRegAndVal->Value == 0) {
1622 // If there's a G_AND feeding into this branch, try to fold it away by
1623 // emitting a TB(N)Z instead.
1624 //
1625 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1626 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1627 // would be redundant.
1628 if (AndInst &&
1629 tryOptAndIntoCompareBranch(
1630 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1631 I.eraseFromParent();
1632 return true;
1633 }
1634
1635 // Otherwise, try to emit a CB(N)Z instead.
1636 auto LHSTy = MRI.getType(LHS);
1637 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1638 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1639 I.eraseFromParent();
1640 return true;
1641 }
1642 }
1643 }
1644
1645 return false;
1646}
1647
1648bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1649 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1650 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1650, __extension__ __PRETTY_FUNCTION__))
;
1651 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1651, __extension__ __PRETTY_FUNCTION__))
;
1652 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1653 return true;
1654
1655 // Couldn't optimize. Emit a compare + a Bcc.
1656 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1657 auto PredOp = ICmp.getOperand(1);
1658 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1659 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1660 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1661 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1662 I.eraseFromParent();
1663 return true;
1664}
1665
1666bool AArch64InstructionSelector::selectCompareBranch(
1667 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1668 Register CondReg = I.getOperand(0).getReg();
1669 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1670 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1671 CondReg = CCMI->getOperand(1).getReg();
1672 CCMI = MRI.getVRegDef(CondReg);
1673 }
1674
1675 // Try to select the G_BRCOND using whatever is feeding the condition if
1676 // possible.
1677 unsigned CCMIOpc = CCMI->getOpcode();
1678 if (CCMIOpc == TargetOpcode::G_FCMP)
1679 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1680 if (CCMIOpc == TargetOpcode::G_ICMP)
1681 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1682
1683 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1684 // instructions will not be produced, as they are conditional branch
1685 // instructions that do not set flags.
1686 if (ProduceNonFlagSettingCondBr) {
1687 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1688 I.getOperand(1).getMBB(), MIB);
1689 I.eraseFromParent();
1690 return true;
1691 }
1692
1693 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1694 auto TstMI =
1695 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1696 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1697 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1698 .addImm(AArch64CC::EQ)
1699 .addMBB(I.getOperand(1).getMBB());
1700 I.eraseFromParent();
1701 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1702}
1703
1704/// Returns the element immediate value of a vector shift operand if found.
1705/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1706static Optional<int64_t> getVectorShiftImm(Register Reg,
1707 MachineRegisterInfo &MRI) {
1708 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1708, __extension__ __PRETTY_FUNCTION__))
;
1709 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1710 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1710, __extension__ __PRETTY_FUNCTION__))
;
1711 return getAArch64VectorSplatScalar(*OpMI, MRI);
1712}
1713
1714/// Matches and returns the shift immediate value for a SHL instruction given
1715/// a shift operand.
1716static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1717 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1718 if (!ShiftImm)
1719 return None;
1720 // Check the immediate is in range for a SHL.
1721 int64_t Imm = *ShiftImm;
1722 if (Imm < 0)
1723 return None;
1724 switch (SrcTy.getElementType().getSizeInBits()) {
1725 default:
1726 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1727 return None;
1728 case 8:
1729 if (Imm > 7)
1730 return None;
1731 break;
1732 case 16:
1733 if (Imm > 15)
1734 return None;
1735 break;
1736 case 32:
1737 if (Imm > 31)
1738 return None;
1739 break;
1740 case 64:
1741 if (Imm > 63)
1742 return None;
1743 break;
1744 }
1745 return Imm;
1746}
1747
1748bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1749 MachineRegisterInfo &MRI) {
1750 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1750, __extension__ __PRETTY_FUNCTION__))
;
1751 Register DstReg = I.getOperand(0).getReg();
1752 const LLT Ty = MRI.getType(DstReg);
1753 Register Src1Reg = I.getOperand(1).getReg();
1754 Register Src2Reg = I.getOperand(2).getReg();
1755
1756 if (!Ty.isVector())
1757 return false;
1758
1759 // Check if we have a vector of constants on RHS that we can select as the
1760 // immediate form.
1761 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1762
1763 unsigned Opc = 0;
1764 if (Ty == LLT::fixed_vector(2, 64)) {
1765 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1766 } else if (Ty == LLT::fixed_vector(4, 32)) {
1767 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1768 } else if (Ty == LLT::fixed_vector(2, 32)) {
1769 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1770 } else if (Ty == LLT::fixed_vector(4, 16)) {
1771 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1772 } else if (Ty == LLT::fixed_vector(8, 16)) {
1773 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1774 } else if (Ty == LLT::fixed_vector(16, 8)) {
1775 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1776 } else if (Ty == LLT::fixed_vector(8, 8)) {
1777 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1778 } else {
1779 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1780 return false;
1781 }
1782
1783 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1784 if (ImmVal)
1785 Shl.addImm(*ImmVal);
1786 else
1787 Shl.addUse(Src2Reg);
1788 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1789 I.eraseFromParent();
1790 return true;
1791}
1792
1793bool AArch64InstructionSelector::selectVectorAshrLshr(
1794 MachineInstr &I, MachineRegisterInfo &MRI) {
1795 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1796, __extension__ __PRETTY_FUNCTION__))
1796 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1796, __extension__ __PRETTY_FUNCTION__))
;
1797 Register DstReg = I.getOperand(0).getReg();
1798 const LLT Ty = MRI.getType(DstReg);
1799 Register Src1Reg = I.getOperand(1).getReg();
1800 Register Src2Reg = I.getOperand(2).getReg();
1801
1802 if (!Ty.isVector())
1803 return false;
1804
1805 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1806
1807 // We expect the immediate case to be lowered in the PostLegalCombiner to
1808 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1809
1810 // There is not a shift right register instruction, but the shift left
1811 // register instruction takes a signed value, where negative numbers specify a
1812 // right shift.
1813
1814 unsigned Opc = 0;
1815 unsigned NegOpc = 0;
1816 const TargetRegisterClass *RC =
1817 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1818 if (Ty == LLT::fixed_vector(2, 64)) {
1819 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1820 NegOpc = AArch64::NEGv2i64;
1821 } else if (Ty == LLT::fixed_vector(4, 32)) {
1822 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1823 NegOpc = AArch64::NEGv4i32;
1824 } else if (Ty == LLT::fixed_vector(2, 32)) {
1825 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1826 NegOpc = AArch64::NEGv2i32;
1827 } else if (Ty == LLT::fixed_vector(4, 16)) {
1828 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1829 NegOpc = AArch64::NEGv4i16;
1830 } else if (Ty == LLT::fixed_vector(8, 16)) {
1831 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1832 NegOpc = AArch64::NEGv8i16;
1833 } else if (Ty == LLT::fixed_vector(16, 8)) {
1834 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1835 NegOpc = AArch64::NEGv16i8;
1836 } else if (Ty == LLT::fixed_vector(8, 8)) {
1837 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1838 NegOpc = AArch64::NEGv8i8;
1839 } else {
1840 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1841 return false;
1842 }
1843
1844 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1845 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1846 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1847 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1848 I.eraseFromParent();
1849 return true;
1850}
1851
1852bool AArch64InstructionSelector::selectVaStartAAPCS(
1853 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1854 return false;
1855}
1856
1857bool AArch64InstructionSelector::selectVaStartDarwin(
1858 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1859 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1860 Register ListReg = I.getOperand(0).getReg();
1861
1862 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1863
1864 auto MIB =
1865 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1866 .addDef(ArgsAddrReg)
1867 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1868 .addImm(0)
1869 .addImm(0);
1870
1871 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1872
1873 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1874 .addUse(ArgsAddrReg)
1875 .addUse(ListReg)
1876 .addImm(0)
1877 .addMemOperand(*I.memoperands_begin());
1878
1879 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1880 I.eraseFromParent();
1881 return true;
1882}
1883
1884void AArch64InstructionSelector::materializeLargeCMVal(
1885 MachineInstr &I, const Value *V, unsigned OpFlags) {
1886 MachineBasicBlock &MBB = *I.getParent();
1887 MachineFunction &MF = *MBB.getParent();
1888 MachineRegisterInfo &MRI = MF.getRegInfo();
1889
1890 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1891 MovZ->addOperand(MF, I.getOperand(1));
1892 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1893 AArch64II::MO_NC);
1894 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1895 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1896
1897 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1898 Register ForceDstReg) {
1899 Register DstReg = ForceDstReg
1900 ? ForceDstReg
1901 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1902 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1903 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1904 MovI->addOperand(MF, MachineOperand::CreateGA(
1905 GV, MovZ->getOperand(1).getOffset(), Flags));
1906 } else {
1907 MovI->addOperand(
1908 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1909 MovZ->getOperand(1).getOffset(), Flags));
1910 }
1911 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1912 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1913 return DstReg;
1914 };
1915 Register DstReg = BuildMovK(MovZ.getReg(0),
1916 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1917 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1918 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1919}
1920
1921bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1922 MachineBasicBlock &MBB = *I.getParent();
1923 MachineFunction &MF = *MBB.getParent();
1924 MachineRegisterInfo &MRI = MF.getRegInfo();
1925
1926 switch (I.getOpcode()) {
1927 case TargetOpcode::G_SHL:
1928 case TargetOpcode::G_ASHR:
1929 case TargetOpcode::G_LSHR: {
1930 // These shifts are legalized to have 64 bit shift amounts because we want
1931 // to take advantage of the existing imported selection patterns that assume
1932 // the immediates are s64s. However, if the shifted type is 32 bits and for
1933 // some reason we receive input GMIR that has an s64 shift amount that's not
1934 // a G_CONSTANT, insert a truncate so that we can still select the s32
1935 // register-register variant.
1936 Register SrcReg = I.getOperand(1).getReg();
1937 Register ShiftReg = I.getOperand(2).getReg();
1938 const LLT ShiftTy = MRI.getType(ShiftReg);
1939 const LLT SrcTy = MRI.getType(SrcReg);
1940 if (SrcTy.isVector())
1941 return false;
1942 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1942, __extension__ __PRETTY_FUNCTION__))
;
1943 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1944 return false;
1945 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1946 assert(AmtMI && "could not find a vreg definition for shift amount")(static_cast <bool> (AmtMI && "could not find a vreg definition for shift amount"
) ? void (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1946, __extension__ __PRETTY_FUNCTION__))
;
1947 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1948 // Insert a subregister copy to implement a 64->32 trunc
1949 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1950 .addReg(ShiftReg, 0, AArch64::sub_32);
1951 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1952 I.getOperand(2).setReg(Trunc.getReg(0));
1953 }
1954 return true;
1955 }
1956 case TargetOpcode::G_STORE: {
1957 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1958 MachineOperand &SrcOp = I.getOperand(0);
1959 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1960 // Allow matching with imported patterns for stores of pointers. Unlike
1961 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1962 // and constrain.
1963 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1964 Register NewSrc = Copy.getReg(0);
1965 SrcOp.setReg(NewSrc);
1966 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1967 Changed = true;
1968 }
1969 return Changed;
1970 }
1971 case TargetOpcode::G_PTR_ADD:
1972 return convertPtrAddToAdd(I, MRI);
1973 case TargetOpcode::G_LOAD: {
1974 // For scalar loads of pointers, we try to convert the dest type from p0
1975 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1976 // conversion, this should be ok because all users should have been
1977 // selected already, so the type doesn't matter for them.
1978 Register DstReg = I.getOperand(0).getReg();
1979 const LLT DstTy = MRI.getType(DstReg);
1980 if (!DstTy.isPointer())
1981 return false;
1982 MRI.setType(DstReg, LLT::scalar(64));
1983 return true;
1984 }
1985 case AArch64::G_DUP: {
1986 // Convert the type from p0 to s64 to help selection.
1987 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1988 if (!DstTy.getElementType().isPointer())
1989 return false;
1990 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1991 MRI.setType(I.getOperand(0).getReg(),
1992 DstTy.changeElementType(LLT::scalar(64)));
1993 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1994 I.getOperand(1).setReg(NewSrc.getReg(0));
1995 return true;
1996 }
1997 case TargetOpcode::G_UITOFP:
1998 case TargetOpcode::G_SITOFP: {
1999 // If both source and destination regbanks are FPR, then convert the opcode
2000 // to G_SITOF so that the importer can select it to an fpr variant.
2001 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2002 // copy.
2003 Register SrcReg = I.getOperand(1).getReg();
2004 LLT SrcTy = MRI.getType(SrcReg);
2005 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2006 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2007 return false;
2008
2009 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2010 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2011 I.setDesc(TII.get(AArch64::G_SITOF));
2012 else
2013 I.setDesc(TII.get(AArch64::G_UITOF));
2014 return true;
2015 }
2016 return false;
2017 }
2018 default:
2019 return false;
2020 }
2021}
2022
2023/// This lowering tries to look for G_PTR_ADD instructions and then converts
2024/// them to a standard G_ADD with a COPY on the source.
2025///
2026/// The motivation behind this is to expose the add semantics to the imported
2027/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2028/// because the selector works bottom up, uses before defs. By the time we
2029/// end up trying to select a G_PTR_ADD, we should have already attempted to
2030/// fold this into addressing modes and were therefore unsuccessful.
2031bool AArch64InstructionSelector::convertPtrAddToAdd(
2032 MachineInstr &I, MachineRegisterInfo &MRI) {
2033 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2033, __extension__ __PRETTY_FUNCTION__))
;
2034 Register DstReg = I.getOperand(0).getReg();
2035 Register AddOp1Reg = I.getOperand(1).getReg();
2036 const LLT PtrTy = MRI.getType(DstReg);
2037 if (PtrTy.getAddressSpace() != 0)
2038 return false;
2039
2040 const LLT CastPtrTy =
2041 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2042 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2043 // Set regbanks on the registers.
2044 if (PtrTy.isVector())
2045 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2046 else
2047 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2048
2049 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2050 // %dst(intty) = G_ADD %intbase, off
2051 I.setDesc(TII.get(TargetOpcode::G_ADD));
2052 MRI.setType(DstReg, CastPtrTy);
2053 I.getOperand(1).setReg(PtrToInt.getReg(0));
2054 if (!select(*PtrToInt)) {
2055 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2056 return false;
2057 }
2058
2059 // Also take the opportunity here to try to do some optimization.
2060 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2061 Register NegatedReg;
2062 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2063 return true;
2064 I.getOperand(2).setReg(NegatedReg);
2065 I.setDesc(TII.get(TargetOpcode::G_SUB));
2066 return true;
2067}
2068
2069bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2070 MachineRegisterInfo &MRI) {
2071 // We try to match the immediate variant of LSL, which is actually an alias
2072 // for a special case of UBFM. Otherwise, we fall back to the imported
2073 // selector which will match the register variant.
2074 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2074, __extension__ __PRETTY_FUNCTION__))
;
2075 const auto &MO = I.getOperand(2);
2076 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2077 if (!VRegAndVal)
2078 return false;
2079
2080 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2081 if (DstTy.isVector())
2082 return false;
2083 bool Is64Bit = DstTy.getSizeInBits() == 64;
2084 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2085 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2086
2087 if (!Imm1Fn || !Imm2Fn)
2088 return false;
2089
2090 auto NewI =
2091 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2092 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2093
2094 for (auto &RenderFn : *Imm1Fn)
2095 RenderFn(NewI);
2096 for (auto &RenderFn : *Imm2Fn)
2097 RenderFn(NewI);
2098
2099 I.eraseFromParent();
2100 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2101}
2102
2103bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2104 MachineInstr &I, MachineRegisterInfo &MRI) {
2105 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2105, __extension__ __PRETTY_FUNCTION__))
;
2106 // If we're storing a scalar, it doesn't matter what register bank that
2107 // scalar is on. All that matters is the size.
2108 //
2109 // So, if we see something like this (with a 32-bit scalar as an example):
2110 //
2111 // %x:gpr(s32) = ... something ...
2112 // %y:fpr(s32) = COPY %x:gpr(s32)
2113 // G_STORE %y:fpr(s32)
2114 //
2115 // We can fix this up into something like this:
2116 //
2117 // G_STORE %x:gpr(s32)
2118 //
2119 // And then continue the selection process normally.
2120 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2121 if (!DefDstReg.isValid())
2122 return false;
2123 LLT DefDstTy = MRI.getType(DefDstReg);
2124 Register StoreSrcReg = I.getOperand(0).getReg();
2125 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2126
2127 // If we get something strange like a physical register, then we shouldn't
2128 // go any further.
2129 if (!DefDstTy.isValid())
2130 return false;
2131
2132 // Are the source and dst types the same size?
2133 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2134 return false;
2135
2136 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2137 RBI.getRegBank(DefDstReg, MRI, TRI))
2138 return false;
2139
2140 // We have a cross-bank copy, which is entering a store. Let's fold it.
2141 I.getOperand(0).setReg(DefDstReg);
2142 return true;
2143}
2144
2145bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2146 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2146, __extension__ __PRETTY_FUNCTION__))
;
2147 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2147, __extension__ __PRETTY_FUNCTION__))
;
2148
2149 MachineBasicBlock &MBB = *I.getParent();
2150 MachineFunction &MF = *MBB.getParent();
2151 MachineRegisterInfo &MRI = MF.getRegInfo();
2152
2153 switch (I.getOpcode()) {
2154 case AArch64::G_DUP: {
2155 // Before selecting a DUP instruction, check if it is better selected as a
2156 // MOV or load from a constant pool.
2157 Register Src = I.getOperand(1).getReg();
2158 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2159 if (!ValAndVReg)
2160 return false;
2161 LLVMContext &Ctx = MF.getFunction().getContext();
2162 Register Dst = I.getOperand(0).getReg();
2163 auto *CV = ConstantDataVector::getSplat(
2164 MRI.getType(Dst).getNumElements(),
2165 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2166 ValAndVReg->Value));
2167 if (!emitConstantVector(Dst, CV, MIB, MRI))
2168 return false;
2169 I.eraseFromParent();
2170 return true;
2171 }
2172 case TargetOpcode::G_SEXT:
2173 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2174 // over a normal extend.
2175 if (selectUSMovFromExtend(I, MRI))
2176 return true;
2177 return false;
2178 case TargetOpcode::G_BR:
2179 return false;
2180 case TargetOpcode::G_SHL:
2181 return earlySelectSHL(I, MRI);
2182 case TargetOpcode::G_CONSTANT: {
2183 bool IsZero = false;
2184 if (I.getOperand(1).isCImm())
2185 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2186 else if (I.getOperand(1).isImm())
2187 IsZero = I.getOperand(1).getImm() == 0;
2188
2189 if (!IsZero)
2190 return false;
2191
2192 Register DefReg = I.getOperand(0).getReg();
2193 LLT Ty = MRI.getType(DefReg);
2194 if (Ty.getSizeInBits() == 64) {
2195 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2196 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2197 } else if (Ty.getSizeInBits() == 32) {
2198 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2199 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2200 } else
2201 return false;
2202
2203 I.setDesc(TII.get(TargetOpcode::COPY));
2204 return true;
2205 }
2206
2207 case TargetOpcode::G_ADD: {
2208 // Check if this is being fed by a G_ICMP on either side.
2209 //
2210 // (cmp pred, x, y) + z
2211 //
2212 // In the above case, when the cmp is true, we increment z by 1. So, we can
2213 // fold the add into the cset for the cmp by using cinc.
2214 //
2215 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2216 Register X = I.getOperand(1).getReg();
2217
2218 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2219 // early if we see it.
2220 LLT Ty = MRI.getType(X);
2221 if (Ty.isVector() || Ty.getSizeInBits() != 32)
2222 return false;
2223
2224 Register CmpReg = I.getOperand(2).getReg();
2225 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2226 if (!Cmp) {
2227 std::swap(X, CmpReg);
2228 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2229 if (!Cmp)
2230 return false;
2231 }
2232 auto Pred =
2233 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2234 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2235 Cmp->getOperand(1), MIB);
2236 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
2237 I.eraseFromParent();
2238 return true;
2239 }
2240 case TargetOpcode::G_OR: {
2241 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2242 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2243 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2244 Register Dst = I.getOperand(0).getReg();
2245 LLT Ty = MRI.getType(Dst);
2246
2247 if (!Ty.isScalar())
2248 return false;
2249
2250 unsigned Size = Ty.getSizeInBits();
2251 if (Size != 32 && Size != 64)
2252 return false;
2253
2254 Register ShiftSrc;
2255 int64_t ShiftImm;
2256 Register MaskSrc;
2257 int64_t MaskImm;
2258 if (!mi_match(
2259 Dst, MRI,
2260 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2261 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2262 return false;
2263
2264 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2265 return false;
2266
2267 int64_t Immr = Size - ShiftImm;
2268 int64_t Imms = Size - ShiftImm - 1;
2269 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2270 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2271 I.eraseFromParent();
2272 return true;
2273 }
2274 default:
2275 return false;
2276 }
2277}
2278
2279bool AArch64InstructionSelector::select(MachineInstr &I) {
2280 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2280, __extension__ __PRETTY_FUNCTION__))
;
2281 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2281, __extension__ __PRETTY_FUNCTION__))
;
2282
2283 MachineBasicBlock &MBB = *I.getParent();
2284 MachineFunction &MF = *MBB.getParent();
2285 MachineRegisterInfo &MRI = MF.getRegInfo();
2286
2287 const AArch64Subtarget *Subtarget =
2288 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2289 if (Subtarget->requiresStrictAlign()) {
2290 // We don't support this feature yet.
2291 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2292 return false;
2293 }
2294
2295 MIB.setInstrAndDebugLoc(I);
2296
2297 unsigned Opcode = I.getOpcode();
2298 // G_PHI requires same handling as PHI
2299 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2300 // Certain non-generic instructions also need some special handling.
2301
2302 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2303 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2304
2305 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2306 const Register DefReg = I.getOperand(0).getReg();
2307 const LLT DefTy = MRI.getType(DefReg);
2308
2309 const RegClassOrRegBank &RegClassOrBank =
2310 MRI.getRegClassOrRegBank(DefReg);
2311
2312 const TargetRegisterClass *DefRC
2313 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2314 if (!DefRC) {
2315 if (!DefTy.isValid()) {
2316 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2317 return false;
2318 }
2319 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2320 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2321 if (!DefRC) {
2322 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2323 return false;
2324 }
2325 }
2326
2327 I.setDesc(TII.get(TargetOpcode::PHI));
2328
2329 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2330 }
2331
2332 if (I.isCopy())
2333 return selectCopy(I, TII, MRI, TRI, RBI);
2334
2335 return true;
2336 }
2337
2338
2339 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2340 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2341 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2342 return false;
2343 }
2344
2345 // Try to do some lowering before we start instruction selecting. These
2346 // lowerings are purely transformations on the input G_MIR and so selection
2347 // must continue after any modification of the instruction.
2348 if (preISelLower(I)) {
2349 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2350 }
2351
2352 // There may be patterns where the importer can't deal with them optimally,
2353 // but does select it to a suboptimal sequence so our custom C++ selection
2354 // code later never has a chance to work on it. Therefore, we have an early
2355 // selection attempt here to give priority to certain selection routines
2356 // over the imported ones.
2357 if (earlySelect(I))
2358 return true;
2359
2360 if (selectImpl(I, *CoverageInfo))
2361 return true;
2362
2363 LLT Ty =
2364 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2365
2366 switch (Opcode) {
2367 case TargetOpcode::G_SBFX:
2368 case TargetOpcode::G_UBFX: {
2369 static const unsigned OpcTable[2][2] = {
2370 {AArch64::UBFMWri, AArch64::UBFMXri},
2371 {AArch64::SBFMWri, AArch64::SBFMXri}};
2372 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2373 unsigned Size = Ty.getSizeInBits();
2374 unsigned Opc = OpcTable[IsSigned][Size == 64];
2375 auto Cst1 =
2376 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2377 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2377, __extension__ __PRETTY_FUNCTION__))
;
2378 auto Cst2 =
2379 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2380 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2380, __extension__ __PRETTY_FUNCTION__))
;
2381 auto LSB = Cst1->Value.getZExtValue();
2382 auto Width = Cst2->Value.getZExtValue();
2383 auto BitfieldInst =
2384 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2385 .addImm(LSB)
2386 .addImm(LSB + Width - 1);
2387 I.eraseFromParent();
2388 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2389 }
2390 case TargetOpcode::G_BRCOND:
2391 return selectCompareBranch(I, MF, MRI);
2392
2393 case TargetOpcode::G_BRINDIRECT: {
2394 I.setDesc(TII.get(AArch64::BR));
2395 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2396 }
2397
2398 case TargetOpcode::G_BRJT:
2399 return selectBrJT(I, MRI);
2400
2401 case AArch64::G_ADD_LOW: {
2402 // This op may have been separated from it's ADRP companion by the localizer
2403 // or some other code motion pass. Given that many CPUs will try to
2404 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2405 // which will later be expanded into an ADRP+ADD pair after scheduling.
2406 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2407 if (BaseMI->getOpcode() != AArch64::ADRP) {
2408 I.setDesc(TII.get(AArch64::ADDXri));
2409 I.addOperand(MachineOperand::CreateImm(0));
2410 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2411 }
2412 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2413, __extension__ __PRETTY_FUNCTION__))
2413 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2413, __extension__ __PRETTY_FUNCTION__))
;
2414 auto Op1 = BaseMI->getOperand(1);
2415 auto Op2 = I.getOperand(2);
2416 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2417 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2418 Op1.getTargetFlags())
2419 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2420 Op2.getTargetFlags());
2421 I.eraseFromParent();
2422 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2423 }
2424
2425 case TargetOpcode::G_BSWAP: {
2426 // Handle vector types for G_BSWAP directly.
2427 Register DstReg = I.getOperand(0).getReg();
2428 LLT DstTy = MRI.getType(DstReg);
2429
2430 // We should only get vector types here; everything else is handled by the
2431 // importer right now.
2432 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2433 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2434 return false;
2435 }
2436
2437 // Only handle 4 and 2 element vectors for now.
2438 // TODO: 16-bit elements.
2439 unsigned NumElts = DstTy.getNumElements();
2440 if (NumElts != 4 && NumElts != 2) {
2441 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2442 return false;
2443 }
2444
2445 // Choose the correct opcode for the supported types. Right now, that's
2446 // v2s32, v4s32, and v2s64.
2447 unsigned Opc = 0;
2448 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2449 if (EltSize == 32)
2450 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2451 : AArch64::REV32v16i8;
2452 else if (EltSize == 64)
2453 Opc = AArch64::REV64v16i8;
2454
2455 // We should always get something by the time we get here...
2456 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2456, __extension__ __PRETTY_FUNCTION__))
;
2457
2458 I.setDesc(TII.get(Opc));
2459 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2460 }
2461
2462 case TargetOpcode::G_FCONSTANT:
2463 case TargetOpcode::G_CONSTANT: {
2464 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2465
2466 const LLT s8 = LLT::scalar(8);
2467 const LLT s16 = LLT::scalar(16);
2468 const LLT s32 = LLT::scalar(32);
2469 const LLT s64 = LLT::scalar(64);
2470 const LLT s128 = LLT::scalar(128);
2471 const LLT p0 = LLT::pointer(0, 64);
2472
2473 const Register DefReg = I.getOperand(0).getReg();
2474 const LLT DefTy = MRI.getType(DefReg);
2475 const unsigned DefSize = DefTy.getSizeInBits();
2476 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2477
2478 // FIXME: Redundant check, but even less readable when factored out.
2479 if (isFP) {
2480 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2481 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2482 << " constant, expected: " << s16 << " or " << s32do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
2483 << " or " << s64 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s16 <<
" or " << s32 << " or " << s64 << " or "
<< s128 << '\n'; } } while (false)
;
2484 return false;
2485 }
2486
2487 if (RB.getID() != AArch64::FPRRegBankID) {
2488 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2489 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2490 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2491 return false;
2492 }
2493
2494 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2495 // can be sure tablegen works correctly and isn't rescued by this code.
2496 // 0.0 is not covered by tablegen for FP128. So we will handle this
2497 // scenario in the code here.
2498 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2499 return false;
2500 } else {
2501 // s32 and s64 are covered by tablegen.
2502 if (Ty != p0 && Ty != s8 && Ty != s16) {
2503 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2504 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2505 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2506 return false;
2507 }
2508
2509 if (RB.getID() != AArch64::GPRRegBankID) {
2510 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2511 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2512 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2513 return false;
2514 }
2515 }
2516
2517 if (isFP) {
2518 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2519 // For 16, 64, and 128b values, emit a constant pool load.
2520 switch (DefSize) {
2521 default:
2522 llvm_unreachable("Unexpected destination size for G_FCONSTANT?")::llvm::llvm_unreachable_internal("Unexpected destination size for G_FCONSTANT?"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2522)
;
2523 case 32:
2524 // For s32, use a cp load if we have optsize/minsize.
2525 if (!shouldOptForSize(&MF))
2526 break;
2527 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2528 case 16:
2529 case 64:
2530 case 128: {
2531 auto *FPImm = I.getOperand(1).getFPImm();
2532 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2533 if (!LoadMI) {
2534 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2535 return false;
2536 }
2537 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2538 I.eraseFromParent();
2539 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2540 }
2541 }
2542
2543 // Either emit a FMOV, or emit a copy to emit a normal mov.
2544 assert(DefSize == 32 &&(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2545, __extension__ __PRETTY_FUNCTION__))
2545 "Expected constant pool loads for all sizes other than 32!")(static_cast <bool> (DefSize == 32 && "Expected constant pool loads for all sizes other than 32!"
) ? void (0) : __assert_fail ("DefSize == 32 && \"Expected constant pool loads for all sizes other than 32!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2545, __extension__ __PRETTY_FUNCTION__))
;
2546 const Register DefGPRReg =
2547 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2548 MachineOperand &RegOp = I.getOperand(0);
2549 RegOp.setReg(DefGPRReg);
2550 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2551 MIB.buildCopy({DefReg}, {DefGPRReg});
2552
2553 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2554 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2555 return false;
2556 }
2557
2558 MachineOperand &ImmOp = I.getOperand(1);
2559 // FIXME: Is going through int64_t always correct?
2560 ImmOp.ChangeToImmediate(
2561 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2562 } else if (I.getOperand(1).isCImm()) {
2563 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2564 I.getOperand(1).ChangeToImmediate(Val);
2565 } else if (I.getOperand(1).isImm()) {
2566 uint64_t Val = I.getOperand(1).getImm();
2567 I.getOperand(1).ChangeToImmediate(Val);
2568 }
2569
2570 const unsigned MovOpc =
2571 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2572 I.setDesc(TII.get(MovOpc));
2573 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2574 return true;
2575 }
2576 case TargetOpcode::G_EXTRACT: {
2577 Register DstReg = I.getOperand(0).getReg();
2578 Register SrcReg = I.getOperand(1).getReg();
2579 LLT SrcTy = MRI.getType(SrcReg);
2580 LLT DstTy = MRI.getType(DstReg);
2581 (void)DstTy;
2582 unsigned SrcSize = SrcTy.getSizeInBits();
2583
2584 if (SrcTy.getSizeInBits() > 64) {
2585 // This should be an extract of an s128, which is like a vector extract.
2586 if (SrcTy.getSizeInBits() != 128)
2587 return false;
2588 // Only support extracting 64 bits from an s128 at the moment.
2589 if (DstTy.getSizeInBits() != 64)
2590 return false;
2591
2592 unsigned Offset = I.getOperand(2).getImm();
2593 if (Offset % 64 != 0)
2594 return false;
2595
2596 // Check we have the right regbank always.
2597 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2598 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2599 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2599, __extension__ __PRETTY_FUNCTION__))
;
2600
2601 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2602 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2603 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2604 I.eraseFromParent();
2605 return true;
2606 }
2607
2608 // Emit the same code as a vector extract.
2609 // Offset must be a multiple of 64.
2610 unsigned LaneIdx = Offset / 64;
2611 MachineInstr *Extract = emitExtractVectorElt(
2612 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2613 if (!Extract)
2614 return false;
2615 I.eraseFromParent();
2616 return true;
2617 }
2618
2619 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2620 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2621 Ty.getSizeInBits() - 1);
2622
2623 if (SrcSize < 64) {
2624 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
2625 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2625, __extension__ __PRETTY_FUNCTION__))
;
2626 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2627 }
2628
2629 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2630 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2631 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2632 .addReg(DstReg, 0, AArch64::sub_32);
2633 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2634 AArch64::GPR32RegClass, MRI);
2635 I.getOperand(0).setReg(DstReg);
2636
2637 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2638 }
2639
2640 case TargetOpcode::G_INSERT: {
2641 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2642 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2643 unsigned DstSize = DstTy.getSizeInBits();
2644 // Larger inserts are vectors, same-size ones should be something else by
2645 // now (split up or turned into COPYs).
2646 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2647 return false;
2648
2649 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2650 unsigned LSB = I.getOperand(3).getImm();
2651 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2652 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2653 MachineInstrBuilder(MF, I).addImm(Width - 1);
2654
2655 if (DstSize < 64) {
2656 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2657, __extension__ __PRETTY_FUNCTION__))
2657 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2657, __extension__ __PRETTY_FUNCTION__))
;
2658 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2659 }
2660
2661 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2662 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2663 TII.get(AArch64::SUBREG_TO_REG))
2664 .addDef(SrcReg)
2665 .addImm(0)
2666 .addUse(I.getOperand(2).getReg())
2667 .addImm(AArch64::sub_32);
2668 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2669 AArch64::GPR32RegClass, MRI);
2670 I.getOperand(2).setReg(SrcReg);
2671
2672 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2673 }
2674 case TargetOpcode::G_FRAME_INDEX: {
2675 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2676 if (Ty != LLT::pointer(0, 64)) {
2677 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2678 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2679 return false;
2680 }
2681 I.setDesc(TII.get(AArch64::ADDXri));
2682
2683 // MOs for a #0 shifted immediate.
2684 I.addOperand(MachineOperand::CreateImm(0));
2685 I.addOperand(MachineOperand::CreateImm(0));
2686
2687 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2688 }
2689
2690 case TargetOpcode::G_GLOBAL_VALUE: {
2691 auto GV = I.getOperand(1).getGlobal();
2692 if (GV->isThreadLocal())
2693 return selectTLSGlobalValue(I, MRI);
2694
2695 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2696 if (OpFlags & AArch64II::MO_GOT) {
2697 I.setDesc(TII.get(AArch64::LOADgot));
2698 I.getOperand(1).setTargetFlags(OpFlags);
2699 } else if (TM.getCodeModel() == CodeModel::Large) {
2700 // Materialize the global using movz/movk instructions.
2701 materializeLargeCMVal(I, GV, OpFlags);
2702 I.eraseFromParent();
2703 return true;
2704 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2705 I.setDesc(TII.get(AArch64::ADR));
2706 I.getOperand(1).setTargetFlags(OpFlags);
2707 } else {
2708 I.setDesc(TII.get(AArch64::MOVaddr));
2709 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2710 MachineInstrBuilder MIB(MF, I);
2711 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2712 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2713 }
2714 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2715 }
2716
2717 case TargetOpcode::G_ZEXTLOAD:
2718 case TargetOpcode::G_LOAD:
2719 case TargetOpcode::G_STORE: {
2720 GLoadStore &LdSt = cast<GLoadStore>(I);
2721 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2722 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2723
2724 if (PtrTy != LLT::pointer(0, 64)) {
2725 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2726 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2727 return false;
2728 }
2729
2730 uint64_t MemSizeInBytes = LdSt.getMemSize();
2731 unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2732 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2733
2734 // Need special instructions for atomics that affect ordering.
2735 if (Order != AtomicOrdering::NotAtomic &&
2736 Order != AtomicOrdering::Unordered &&
2737 Order != AtomicOrdering::Monotonic) {
2738 assert(!isa<GZExtLoad>(LdSt))(static_cast <bool> (!isa<GZExtLoad>(LdSt)) ? void
(0) : __assert_fail ("!isa<GZExtLoad>(LdSt)", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2738, __extension__ __PRETTY_FUNCTION__))
;
2739 if (MemSizeInBytes > 64)
2740 return false;
2741
2742 if (isa<GLoad>(LdSt)) {
2743 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2744 AArch64::LDARW, AArch64::LDARX};
2745 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2746 } else {
2747 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2748 AArch64::STLRW, AArch64::STLRX};
2749 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2750 }
2751 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2752 return true;
2753 }
2754
2755#ifndef NDEBUG
2756 const Register PtrReg = LdSt.getPointerReg();
2757 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2758 // Sanity-check the pointer register.
2759 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2760, __extension__ __PRETTY_FUNCTION__))
2760 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2760, __extension__ __PRETTY_FUNCTION__))
;
2761 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2762, __extension__ __PRETTY_FUNCTION__))
2762 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2762, __extension__ __PRETTY_FUNCTION__))
;
2763#endif
2764
2765 const Register ValReg = LdSt.getReg(0);
2766 const LLT ValTy = MRI.getType(ValReg);
2767 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2768
2769 // The code below doesn't support truncating stores, so we need to split it
2770 // again.
2771 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2772 unsigned SubReg;
2773 LLT MemTy = LdSt.getMMO().getMemoryType();
2774 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2775 if (!getSubRegForClass(RC, TRI, SubReg))
2776 return false;
2777
2778 // Generate a subreg copy.
2779 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2780 .addReg(ValReg, 0, SubReg)
2781 .getReg(0);
2782 RBI.constrainGenericRegister(Copy, *RC, MRI);
2783 LdSt.getOperand(0).setReg(Copy);
2784 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2785 // If this is an any-extending load from the FPR bank, split it into a regular
2786 // load + extend.
2787 if (RB.getID() == AArch64::FPRRegBankID) {
2788 unsigned SubReg;
2789 LLT MemTy = LdSt.getMMO().getMemoryType();
2790 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2791 if (!getSubRegForClass(RC, TRI, SubReg))
2792 return false;
2793 Register OldDst = LdSt.getReg(0);
2794 Register NewDst =
2795 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2796 LdSt.getOperand(0).setReg(NewDst);
2797 MRI.setRegBank(NewDst, RB);
2798 // Generate a SUBREG_TO_REG to extend it.
2799 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2800 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2801 .addImm(0)
2802 .addUse(NewDst)
2803 .addImm(SubReg);
2804 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2805 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2806 MIB.setInstr(LdSt);
2807 }
2808 }
2809
2810 // Helper lambda for partially selecting I. Either returns the original
2811 // instruction with an updated opcode, or a new instruction.
2812 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2813 bool IsStore = isa<GStore>(I);
2814 const unsigned NewOpc =
2815 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2816 if (NewOpc == I.getOpcode())
2817 return nullptr;
2818 // Check if we can fold anything into the addressing mode.
2819 auto AddrModeFns =
2820 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2821 if (!AddrModeFns) {
2822 // Can't fold anything. Use the original instruction.
2823 I.setDesc(TII.get(NewOpc));
2824 I.addOperand(MachineOperand::CreateImm(0));
2825 return &I;
2826 }
2827
2828 // Folded something. Create a new instruction and return it.
2829 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2830 Register CurValReg = I.getOperand(0).getReg();
2831 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2832 NewInst.cloneMemRefs(I);
2833 for (auto &Fn : *AddrModeFns)
2834 Fn(NewInst);
2835 I.eraseFromParent();
2836 return &*NewInst;
2837 };
2838
2839 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2840 if (!LoadStore)
2841 return false;
2842
2843 // If we're storing a 0, use WZR/XZR.
2844 if (Opcode == TargetOpcode::G_STORE) {
2845 auto CVal = getIConstantVRegValWithLookThrough(
2846 LoadStore->getOperand(0).getReg(), MRI);
2847 if (CVal && CVal->Value == 0) {
2848 switch (LoadStore->getOpcode()) {
2849 case AArch64::STRWui:
2850 case AArch64::STRHHui:
2851 case AArch64::STRBBui:
2852 LoadStore->getOperand(0).setReg(AArch64::WZR);
2853 break;
2854 case AArch64::STRXui:
2855 LoadStore->getOperand(0).setReg(AArch64::XZR);
2856 break;
2857 }
2858 }
2859 }
2860
2861 if (IsZExtLoad) {
2862 // The zextload from a smaller type to i32 should be handled by the
2863 // importer.
2864 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2865 return false;
2866 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2867 // and zero_extend with SUBREG_TO_REG.
2868 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2869 Register DstReg = LoadStore->getOperand(0).getReg();
2870 LoadStore->getOperand(0).setReg(LdReg);
2871
2872 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2873 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2874 .addImm(0)
2875 .addUse(LdReg)
2876 .addImm(AArch64::sub_32);
2877 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2878 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2879 MRI);
2880 }
2881 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2882 }
2883
2884 case TargetOpcode::G_SMULH:
2885 case TargetOpcode::G_UMULH: {
2886 // Reject the various things we don't support yet.
2887 if (unsupportedBinOp(I, RBI, MRI, TRI))
2888 return false;
2889
2890 const Register DefReg = I.getOperand(0).getReg();
2891 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2892
2893 if (RB.getID() != AArch64::GPRRegBankID) {
2894 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2895 return false;
2896 }
2897
2898 if (Ty != LLT::scalar(64)) {
2899 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2900 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2901 return false;
2902 }
2903
2904 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2905 : AArch64::UMULHrr;
2906 I.setDesc(TII.get(NewOpc));
2907
2908 // Now that we selected an opcode, we need to constrain the register
2909 // operands to use appropriate classes.
2910 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2911 }
2912 case TargetOpcode::G_LSHR:
2913 case TargetOpcode::G_ASHR:
2914 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2915 return selectVectorAshrLshr(I, MRI);
2916 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2917 case TargetOpcode::G_SHL:
2918 if (Opcode == TargetOpcode::G_SHL &&
2919 MRI.getType(I.getOperand(0).getReg()).isVector())
2920 return selectVectorSHL(I, MRI);
2921 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2922 case TargetOpcode::G_FADD:
2923 case TargetOpcode::G_FSUB:
2924 case TargetOpcode::G_FMUL:
2925 case TargetOpcode::G_FDIV:
2926 case TargetOpcode::G_OR: {
2927 // Reject the various things we don't support yet.
2928 if (unsupportedBinOp(I, RBI, MRI, TRI))
2929 return false;
2930
2931 const unsigned OpSize = Ty.getSizeInBits();
2932
2933 const Register DefReg = I.getOperand(0).getReg();
2934 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2935
2936 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2937 if (NewOpc == I.getOpcode())
2938 return false;
2939
2940 I.setDesc(TII.get(NewOpc));
2941 // FIXME: Should the type be always reset in setDesc?
2942
2943 // Now that we selected an opcode, we need to constrain the register
2944 // operands to use appropriate classes.
2945 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2946 }
2947
2948 case TargetOpcode::G_PTR_ADD: {
2949 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2950 I.eraseFromParent();
2951 return true;
2952 }
2953 case TargetOpcode::G_SADDO:
2954 case TargetOpcode::G_UADDO:
2955 case TargetOpcode::G_SSUBO:
2956 case TargetOpcode::G_USUBO: {
2957 // Emit the operation and get the correct condition code.
2958 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2959 I.getOperand(2), I.getOperand(3), MIB);
2960
2961 // Now, put the overflow result in the register given by the first operand
2962 // to the overflow op. CSINC increments the result when the predicate is
2963 // false, so to get the increment when it's true, we need to use the
2964 // inverse. In this case, we want to increment when carry is set.
2965 Register ZReg = AArch64::WZR;
2966 auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2967 {ZReg, ZReg})
2968 .addImm(getInvertedCondCode(OpAndCC.second));
2969 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2970 I.eraseFromParent();
2971 return true;
2972 }
2973
2974 case TargetOpcode::G_PTRMASK: {
2975 Register MaskReg = I.getOperand(2).getReg();
2976 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
2977 // TODO: Implement arbitrary cases
2978 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2979 return false;
2980
2981 uint64_t Mask = *MaskVal;
2982 I.setDesc(TII.get(AArch64::ANDXri));
2983 I.getOperand(2).ChangeToImmediate(
2984 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2985
2986 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2987 }
2988 case TargetOpcode::G_PTRTOINT:
2989 case TargetOpcode::G_TRUNC: {
2990 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2991 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2992
2993 const Register DstReg = I.getOperand(0).getReg();
2994 const Register SrcReg = I.getOperand(1).getReg();
2995
2996 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2997 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2998
2999 if (DstRB.getID() != SrcRB.getID()) {
3000 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
3001 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
3002 return false;
3003 }
3004
3005 if (DstRB.getID() == AArch64::GPRRegBankID) {
3006 const TargetRegisterClass *DstRC =
3007 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3008 if (!DstRC)
3009 return false;
3010
3011 const TargetRegisterClass *SrcRC =
3012 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3013 if (!SrcRC)
3014 return false;
3015
3016 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3017 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3018 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3019 return false;
3020 }
3021
3022 if (DstRC == SrcRC) {
3023 // Nothing to be done
3024 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3025 SrcTy == LLT::scalar(64)) {
3026 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3026)
;
3027 return false;
3028 } else if (DstRC == &AArch64::GPR32RegClass &&
3029 SrcRC == &AArch64::GPR64RegClass) {
3030 I.getOperand(1).setSubReg(AArch64::sub_32);
3031 } else {
3032 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
3033 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
3034 return false;
3035 }
3036
3037 I.setDesc(TII.get(TargetOpcode::COPY));
3038 return true;
3039 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3040 if (DstTy == LLT::fixed_vector(4, 16) &&
3041 SrcTy == LLT::fixed_vector(4, 32)) {
3042 I.setDesc(TII.get(AArch64::XTNv4i16));
3043 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3044 return true;
3045 }
3046
3047 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3048 MachineInstr *Extract = emitExtractVectorElt(
3049 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3050 if (!Extract)
3051 return false;
3052 I.eraseFromParent();
3053 return true;
3054 }
3055
3056 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3057 if (Opcode == TargetOpcode::G_PTRTOINT) {
3058 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3058, __extension__ __PRETTY_FUNCTION__))
;
3059 I.setDesc(TII.get(TargetOpcode::COPY));
3060 return selectCopy(I, TII, MRI, TRI, RBI);
3061 }
3062 }
3063
3064 return false;
3065 }
3066
3067 case TargetOpcode::G_ANYEXT: {
3068 if (selectUSMovFromExtend(I, MRI))
3069 return true;
3070
3071 const Register DstReg = I.getOperand(0).getReg();
3072 const Register SrcReg = I.getOperand(1).getReg();
3073
3074 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3075 if (RBDst.getID() != AArch64::GPRRegBankID) {
3076 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3077 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3078 return false;
3079 }
3080
3081 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3082 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3083 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3084 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3085 return false;
3086 }
3087
3088 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3089
3090 if (DstSize == 0) {
3091 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3092 return false;
3093 }
3094
3095 if (DstSize != 64 && DstSize > 32) {
3096 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3097 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3098 return false;
3099 }
3100 // At this point G_ANYEXT is just like a plain COPY, but we need
3101 // to explicitly form the 64-bit value if any.
3102 if (DstSize > 32) {
3103 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3104 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3105 .addDef(ExtSrc)
3106 .addImm(0)
3107 .addUse(SrcReg)
3108 .addImm(AArch64::sub_32);
3109 I.getOperand(1).setReg(ExtSrc);
3110 }
3111 return selectCopy(I, TII, MRI, TRI, RBI);
3112 }
3113
3114 case TargetOpcode::G_ZEXT:
3115 case TargetOpcode::G_SEXT_INREG:
3116 case TargetOpcode::G_SEXT: {
3117 if (selectUSMovFromExtend(I, MRI))
3118 return true;
3119
3120 unsigned Opcode = I.getOpcode();
3121 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3122 const Register DefReg = I.getOperand(0).getReg();
3123 Register SrcReg = I.getOperand(1).getReg();
3124 const LLT DstTy = MRI.getType(DefReg);
3125 const LLT SrcTy = MRI.getType(SrcReg);
3126 unsigned DstSize = DstTy.getSizeInBits();
3127 unsigned SrcSize = SrcTy.getSizeInBits();
3128
3129 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3130 // extended is encoded in the imm.
3131 if (Opcode == TargetOpcode::G_SEXT_INREG)
3132 SrcSize = I.getOperand(2).getImm();
3133
3134 if (DstTy.isVector())
3135 return false; // Should be handled by imported patterns.
3136
3137 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3139, __extension__ __PRETTY_FUNCTION__))
3138 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3139, __extension__ __PRETTY_FUNCTION__))
3139 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3139, __extension__ __PRETTY_FUNCTION__))
;
3140
3141 MachineInstr *ExtI;
3142
3143 // First check if we're extending the result of a load which has a dest type
3144 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3145 // GPR register on AArch64 and all loads which are smaller automatically
3146 // zero-extend the upper bits. E.g.
3147 // %v(s8) = G_LOAD %p, :: (load 1)
3148 // %v2(s32) = G_ZEXT %v(s8)
3149 if (!IsSigned) {
3150 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3151 bool IsGPR =
3152 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3153 if (LoadMI && IsGPR) {
3154 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3155 unsigned BytesLoaded = MemOp->getSize();
3156 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3157 return selectCopy(I, TII, MRI, TRI, RBI);
3158 }
3159
3160 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3161 // + SUBREG_TO_REG.
3162 //
3163 // If we are zero extending from 32 bits to 64 bits, it's possible that
3164 // the instruction implicitly does the zero extend for us. In that case,
3165 // we only need the SUBREG_TO_REG.
3166 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3167 // Unlike with the G_LOAD case, we don't want to look through copies
3168 // here. (See isDef32.)
3169 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3170 Register SubregToRegSrc = SrcReg;
3171
3172 // Does the instruction implicitly zero extend?
3173 if (!Def || !isDef32(*Def)) {
3174 // No. Zero out using an OR.
3175 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3176 const Register ZReg = AArch64::WZR;
3177 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3178 SubregToRegSrc = OrDst;
3179 }
3180
3181 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3182 .addImm(0)
3183 .addUse(SubregToRegSrc)
3184 .addImm(AArch64::sub_32);
3185
3186 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3187 MRI)) {
3188 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3189 return false;
3190 }
3191
3192 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3193 MRI)) {
3194 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3195 return false;
3196 }
3197
3198 I.eraseFromParent();
3199 return true;
3200 }
3201 }
3202
3203 if (DstSize == 64) {
3204 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3205 // FIXME: Can we avoid manually doing this?
3206 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3207 MRI)) {
3208 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3209 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3210 return false;
3211 }
3212 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3213 {&AArch64::GPR64RegClass}, {})
3214 .addImm(0)
3215 .addUse(SrcReg)
3216 .addImm(AArch64::sub_32)
3217 .getReg(0);
3218 }
3219
3220 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3221 {DefReg}, {SrcReg})
3222 .addImm(0)
3223 .addImm(SrcSize - 1);
3224 } else if (DstSize <= 32) {
3225 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3226 {DefReg}, {SrcReg})
3227 .addImm(0)
3228 .addImm(SrcSize - 1);
3229 } else {
3230 return false;
3231 }
3232
3233 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3234 I.eraseFromParent();
3235 return true;
3236 }
3237
3238 case TargetOpcode::G_SITOFP:
3239 case TargetOpcode::G_UITOFP:
3240 case TargetOpcode::G_FPTOSI:
3241 case TargetOpcode::G_FPTOUI: {
3242 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3243 SrcTy = MRI.getType(I.getOperand(1).getReg());
3244 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3245 if (NewOpc == Opcode)
3246 return false;
3247
3248 I.setDesc(TII.get(NewOpc));
3249 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3250
3251 return true;
3252 }
3253
3254 case TargetOpcode::G_FREEZE:
3255 return selectCopy(I, TII, MRI, TRI, RBI);
3256
3257 case TargetOpcode::G_INTTOPTR:
3258 // The importer is currently unable to import pointer types since they
3259 // didn't exist in SelectionDAG.
3260 return selectCopy(I, TII, MRI, TRI, RBI);
3261
3262 case TargetOpcode::G_BITCAST:
3263 // Imported SelectionDAG rules can handle every bitcast except those that
3264 // bitcast from a type to the same type. Ideally, these shouldn't occur
3265 // but we might not run an optimizer that deletes them. The other exception
3266 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3267 // of them.
3268 return selectCopy(I, TII, MRI, TRI, RBI);
3269
3270 case TargetOpcode::G_SELECT: {
3271 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3272 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3273 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3274 return false;
3275 }
3276
3277 const Register CondReg = I.getOperand(1).getReg();
3278 const Register TReg = I.getOperand(2).getReg();
3279 const Register FReg = I.getOperand(3).getReg();
3280
3281 if (tryOptSelect(I))
3282 return true;
3283
3284 // Make sure to use an unused vreg instead of wzr, so that the peephole
3285 // optimizations will be able to optimize these.
3286 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3287 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3288 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3289 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3290 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3291 return false;
3292 I.eraseFromParent();
3293 return true;
3294 }
3295 case TargetOpcode::G_ICMP: {
3296 if (Ty.isVector())
3297 return selectVectorICmp(I, MRI);
3298
3299 if (Ty != LLT::scalar(32)) {
3300 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3301 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3302 return false;
3303 }
3304
3305 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3306 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3307 MIB);
3308 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
3309 I.eraseFromParent();
3310 return true;
3311 }
3312
3313 case TargetOpcode::G_FCMP: {
3314 CmpInst::Predicate Pred =
3315 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3316 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3317 Pred) ||
3318 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3319 return false;
3320 I.eraseFromParent();
3321 return true;
3322 }
3323 case TargetOpcode::G_VASTART:
3324 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3325 : selectVaStartAAPCS(I, MF, MRI);
3326 case TargetOpcode::G_INTRINSIC:
3327 return selectIntrinsic(I, MRI);
3328 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3329 return selectIntrinsicWithSideEffects(I, MRI);
3330 case TargetOpcode::G_IMPLICIT_DEF: {
3331 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3332 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3333 const Register DstReg = I.getOperand(0).getReg();
3334 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3335 const TargetRegisterClass *DstRC =
3336 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3337 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3338 return true;
3339 }
3340 case TargetOpcode::G_BLOCK_ADDR: {
3341 if (TM.getCodeModel() == CodeModel::Large) {
3342 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3343 I.eraseFromParent();
3344 return true;
3345 } else {
3346 I.setDesc(TII.get(AArch64::MOVaddrBA));
3347 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3348 I.getOperand(0).getReg())
3349 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3350 /* Offset */ 0, AArch64II::MO_PAGE)
3351 .addBlockAddress(
3352 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3353 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3354 I.eraseFromParent();
3355 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3356 }
3357 }
3358 case AArch64::G_DUP: {
3359 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3360 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3361 // difficult because at RBS we may end up pessimizing the fpr case if we
3362 // decided to add an anyextend to fix this. Manual selection is the most
3363 // robust solution for now.
3364 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3365 AArch64::GPRRegBankID)
3366 return false; // We expect the fpr regbank case to be imported.
3367 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3368 if (VecTy == LLT::fixed_vector(8, 8))
3369 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3370 else if (VecTy == LLT::fixed_vector(16, 8))
3371 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3372 else if (VecTy == LLT::fixed_vector(4, 16))
3373 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3374 else if (VecTy == LLT::fixed_vector(8, 16))
3375 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3376 else
3377 return false;
3378 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3379 }
3380 case TargetOpcode::G_INTRINSIC_TRUNC:
3381 return selectIntrinsicTrunc(I, MRI);
3382 case TargetOpcode::G_INTRINSIC_ROUND:
3383 return selectIntrinsicRound(I, MRI);
3384 case TargetOpcode::G_BUILD_VECTOR:
3385 return selectBuildVector(I, MRI);
3386 case TargetOpcode::G_MERGE_VALUES:
3387 return selectMergeValues(I, MRI);
3388 case TargetOpcode::G_UNMERGE_VALUES:
3389 return selectUnmergeValues(I, MRI);
3390 case TargetOpcode::G_SHUFFLE_VECTOR:
3391 return selectShuffleVector(I, MRI);
3392 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3393 return selectExtractElt(I, MRI);
3394 case TargetOpcode::G_INSERT_VECTOR_ELT:
3395 return selectInsertElt(I, MRI);
3396 case TargetOpcode::G_CONCAT_VECTORS:
3397 return selectConcatVectors(I, MRI);
3398 case TargetOpcode::G_JUMP_TABLE:
3399 return selectJumpTable(I, MRI);
3400 case TargetOpcode::G_VECREDUCE_FADD:
3401 case TargetOpcode::G_VECREDUCE_ADD:
3402 return selectReduction(I, MRI);
3403 }
3404
3405 return false;
3406}
3407
3408bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3409 MachineRegisterInfo &MRI) {
3410 Register VecReg = I.getOperand(1).getReg();
3411 LLT VecTy = MRI.getType(VecReg);
3412 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3413 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3414 // a subregister copy afterwards.
3415 if (VecTy == LLT::fixed_vector(2, 32)) {
3416 Register DstReg = I.getOperand(0).getReg();
3417 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3418 {VecReg, VecReg});
3419 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3420 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3421 .getReg(0);
3422 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3423 I.eraseFromParent();
3424 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3425 }
3426
3427 unsigned Opc = 0;
3428 if (VecTy == LLT::fixed_vector(16, 8))
3429 Opc = AArch64::ADDVv16i8v;
3430 else if (VecTy == LLT::fixed_vector(8, 16))
3431 Opc = AArch64::ADDVv8i16v;
3432 else if (VecTy == LLT::fixed_vector(4, 32))
3433 Opc = AArch64::ADDVv4i32v;
3434 else if (VecTy == LLT::fixed_vector(2, 64))
3435 Opc = AArch64::ADDPv2i64p;
3436 else {
3437 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3438 return false;
3439 }
3440 I.setDesc(TII.get(Opc));
3441 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3442 }
3443
3444 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3445 unsigned Opc = 0;
3446 if (VecTy == LLT::fixed_vector(2, 32))
3447 Opc = AArch64::FADDPv2i32p;
3448 else if (VecTy == LLT::fixed_vector(2, 64))
3449 Opc = AArch64::FADDPv2i64p;
3450 else {
3451 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3452 return false;
3453 }
3454 I.setDesc(TII.get(Opc));
3455 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3456 }
3457 return false;
3458}
3459
3460bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3461 MachineRegisterInfo &MRI) {
3462 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3462, __extension__ __PRETTY_FUNCTION__))
;
3463 Register JTAddr = I.getOperand(0).getReg();
3464 unsigned JTI = I.getOperand(1).getIndex();
3465 Register Index = I.getOperand(2).getReg();
3466
3467 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3468 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3469
3470 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3471 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3472 {TargetReg, ScratchReg}, {JTAddr, Index})
3473 .addJumpTableIndex(JTI);
3474 // Build the indirect branch.
3475 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3476 I.eraseFromParent();
3477 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3478}
3479
3480bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3481 MachineRegisterInfo &MRI) {
3482 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3482, __extension__ __PRETTY_FUNCTION__))
;
3483 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3483, __extension__ __PRETTY_FUNCTION__))
;
3484
3485 Register DstReg = I.getOperand(0).getReg();
3486 unsigned JTI = I.getOperand(1).getIndex();
3487 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3488 auto MovMI =
3489 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3490 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3491 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3492 I.eraseFromParent();
3493 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3494}
3495
3496bool AArch64InstructionSelector::selectTLSGlobalValue(
3497 MachineInstr &I, MachineRegisterInfo &MRI) {
3498 if (!STI.isTargetMachO())
3499 return false;
3500 MachineFunction &MF = *I.getParent()->getParent();
3501 MF.getFrameInfo().setAdjustsStack(true);
3502
3503 const auto &GlobalOp = I.getOperand(1);
3504 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3505, __extension__ __PRETTY_FUNCTION__))
3505 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3505, __extension__ __PRETTY_FUNCTION__))
;
3506 const GlobalValue &GV = *GlobalOp.getGlobal();
3507
3508 auto LoadGOT =
3509 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3510 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3511
3512 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3513 {LoadGOT.getReg(0)})
3514 .addImm(0);
3515
3516 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3517 // TLS calls preserve all registers except those that absolutely must be
3518 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3519 // silly).
3520 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3521 .addUse(AArch64::X0, RegState::Implicit)
3522 .addDef(AArch64::X0, RegState::Implicit)
3523 .addRegMask(TRI.getTLSCallPreservedMask());
3524
3525 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3526 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3527 MRI);
3528 I.eraseFromParent();
3529 return true;
3530}
3531
3532bool AArch64InstructionSelector::selectIntrinsicTrunc(
3533 MachineInstr &I, MachineRegisterInfo &MRI) const {
3534 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3535
3536 // Select the correct opcode.
3537 unsigned Opc = 0;
3538 if (!SrcTy.isVector()) {
3539 switch (SrcTy.getSizeInBits()) {
3540 default:
3541 case 16:
3542 Opc = AArch64::FRINTZHr;
3543 break;
3544 case 32:
3545 Opc = AArch64::FRINTZSr;
3546 break;
3547 case 64:
3548 Opc = AArch64::FRINTZDr;
3549 break;
3550 }
3551 } else {
3552 unsigned NumElts = SrcTy.getNumElements();
3553 switch (SrcTy.getElementType().getSizeInBits()) {
3554 default:
3555 break;
3556 case 16:
3557 if (NumElts == 4)
3558 Opc = AArch64::FRINTZv4f16;
3559 else if (NumElts == 8)
3560 Opc = AArch64::FRINTZv8f16;
3561 break;
3562 case 32:
3563 if (NumElts == 2)
3564 Opc = AArch64::FRINTZv2f32;
3565 else if (NumElts == 4)
3566 Opc = AArch64::FRINTZv4f32;
3567 break;
3568 case 64:
3569 if (NumElts == 2)
3570 Opc = AArch64::FRINTZv2f64;
3571 break;
3572 }
3573 }
3574
3575 if (!Opc) {
3576 // Didn't get an opcode above, bail.
3577 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3578 return false;
3579 }
3580
3581 // Legalization would have set us up perfectly for this; we just need to
3582 // set the opcode and move on.
3583 I.setDesc(TII.get(Opc));
3584 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3585}
3586
3587bool AArch64InstructionSelector::selectIntrinsicRound(
3588 MachineInstr &I, MachineRegisterInfo &MRI) const {
3589 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3590
3591 // Select the correct opcode.
3592 unsigned Opc = 0;
3593 if (!SrcTy.isVector()) {
3594 switch (SrcTy.getSizeInBits()) {
3595 default:
3596 case 16:
3597 Opc = AArch64::FRINTAHr;
3598 break;
3599 case 32:
3600 Opc = AArch64::FRINTASr;
3601 break;
3602 case 64:
3603 Opc = AArch64::FRINTADr;
3604 break;
3605 }
3606 } else {
3607 unsigned NumElts = SrcTy.getNumElements();
3608 switch (SrcTy.getElementType().getSizeInBits()) {
3609 default:
3610 break;
3611 case 16:
3612 if (NumElts == 4)
3613 Opc = AArch64::FRINTAv4f16;
3614 else if (NumElts == 8)
3615 Opc = AArch64::FRINTAv8f16;
3616 break;
3617 case 32:
3618 if (NumElts == 2)
3619 Opc = AArch64::FRINTAv2f32;
3620 else if (NumElts == 4)
3621 Opc = AArch64::FRINTAv4f32;
3622 break;
3623 case 64:
3624 if (NumElts == 2)
3625 Opc = AArch64::FRINTAv2f64;
3626 break;
3627 }
3628 }
3629
3630 if (!Opc) {
3631 // Didn't get an opcode above, bail.
3632 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3633 return false;
3634 }
3635
3636 // Legalization would have set us up perfectly for this; we just need to
3637 // set the opcode and move on.
3638 I.setDesc(TII.get(Opc));
3639 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3640}
3641
3642bool AArch64InstructionSelector::selectVectorICmp(
3643 MachineInstr &I, MachineRegisterInfo &MRI) {
3644 Register DstReg = I.getOperand(0).getReg();
3645 LLT DstTy = MRI.getType(DstReg);
3646 Register SrcReg = I.getOperand(2).getReg();
3647 Register Src2Reg = I.getOperand(3).getReg();
3648 LLT SrcTy = MRI.getType(SrcReg);
3649
3650 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3651 unsigned NumElts = DstTy.getNumElements();
3652
3653 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3654 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3655 // Third index is cc opcode:
3656 // 0 == eq
3657 // 1 == ugt
3658 // 2 == uge
3659 // 3 == ult
3660 // 4 == ule
3661 // 5 == sgt
3662 // 6 == sge
3663 // 7 == slt
3664 // 8 == sle
3665 // ne is done by negating 'eq' result.
3666
3667 // This table below assumes that for some comparisons the operands will be
3668 // commuted.
3669 // ult op == commute + ugt op
3670 // ule op == commute + uge op
3671 // slt op == commute + sgt op
3672 // sle op == commute + sge op
3673 unsigned PredIdx = 0;
3674 bool SwapOperands = false;
3675 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3676 switch (Pred) {
3677 case CmpInst::ICMP_NE:
3678 case CmpInst::ICMP_EQ:
3679 PredIdx = 0;
3680 break;
3681 case CmpInst::ICMP_UGT:
3682 PredIdx = 1;
3683 break;
3684 case CmpInst::ICMP_UGE:
3685 PredIdx = 2;
3686 break;
3687 case CmpInst::ICMP_ULT:
3688 PredIdx = 3;
3689 SwapOperands = true;
3690 break;
3691 case CmpInst::ICMP_ULE:
3692 PredIdx = 4;
3693 SwapOperands = true;
3694 break;
3695 case CmpInst::ICMP_SGT:
3696 PredIdx = 5;
3697 break;
3698 case CmpInst::ICMP_SGE:
3699 PredIdx = 6;
3700 break;
3701 case CmpInst::ICMP_SLT:
3702 PredIdx = 7;
3703 SwapOperands = true;
3704 break;
3705 case CmpInst::ICMP_SLE:
3706 PredIdx = 8;
3707 SwapOperands = true;
3708 break;
3709 default:
3710 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3710)
;
3711 return false;
3712 }
3713
3714 // This table obviously should be tablegen'd when we have our GISel native
3715 // tablegen selector.
3716
3717 static const unsigned OpcTable[4][4][9] = {
3718 {
3719 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3720 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3721 0 /* invalid */},
3722 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3723 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3724 0 /* invalid */},
3725 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3726 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3727 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3728 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3729 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3730 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3731 },
3732 {
3733 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3735 0 /* invalid */},
3736 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3737 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3738 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3739 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3740 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3741 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3742 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3743 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3744 0 /* invalid */}
3745 },
3746 {
3747 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3748 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3749 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3750 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3751 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3752 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3753 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3754 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3755 0 /* invalid */},
3756 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3758 0 /* invalid */}
3759 },
3760 {
3761 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3762 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3763 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3764 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3765 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3766 0 /* invalid */},
3767 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3769 0 /* invalid */},
3770 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3772 0 /* invalid */}
3773 },
3774 };
3775 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3776 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3777 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3778 if (!Opc) {
3779 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3780 return false;
3781 }
3782
3783 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3784 const TargetRegisterClass *SrcRC =
3785 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3786 if (!SrcRC) {
3787 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3788 return false;
3789 }
3790
3791 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3792 if (SrcTy.getSizeInBits() == 128)
3793 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3794
3795 if (SwapOperands)
3796 std::swap(SrcReg, Src2Reg);
3797
3798 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3799 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3800
3801 // Invert if we had a 'ne' cc.
3802 if (NotOpc) {
3803 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3804 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3805 } else {
3806 MIB.buildCopy(DstReg, Cmp.getReg(0));
3807 }
3808 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3809 I.eraseFromParent();
3810 return true;
3811}
3812
3813MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3814 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3815 MachineIRBuilder &MIRBuilder) const {
3816 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3817
3818 auto BuildFn = [&](unsigned SubregIndex) {
3819 auto Ins =
3820 MIRBuilder
3821 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3822 .addImm(SubregIndex);
3823 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3824 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3825 return &*Ins;
3826 };
3827
3828 switch (EltSize) {
3829 case 16:
3830 return BuildFn(AArch64::hsub);
3831 case 32:
3832 return BuildFn(AArch64::ssub);
3833 case 64:
3834 return BuildFn(AArch64::dsub);
3835 default:
3836 return nullptr;
3837 }
3838}
3839
3840bool AArch64InstructionSelector::selectMergeValues(
3841 MachineInstr &I, MachineRegisterInfo &MRI) {
3842 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3842, __extension__ __PRETTY_FUNCTION__))
;
3843 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3844 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3845 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3845, __extension__ __PRETTY_FUNCTION__))
;
3846 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3847
3848 if (I.getNumOperands() != 3)
3849 return false;
3850
3851 // Merging 2 s64s into an s128.
3852 if (DstTy == LLT::scalar(128)) {
3853 if (SrcTy.getSizeInBits() != 64)
3854 return false;
3855 Register DstReg = I.getOperand(0).getReg();
3856 Register Src1Reg = I.getOperand(1).getReg();
3857 Register Src2Reg = I.getOperand(2).getReg();
3858 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3859 MachineInstr *InsMI =
3860 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3861 if (!InsMI)
3862 return false;
3863 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3864 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3865 if (!Ins2MI)
3866 return false;
3867 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3868 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3869 I.eraseFromParent();
3870 return true;
3871 }
3872
3873 if (RB.getID() != AArch64::GPRRegBankID)
3874 return false;
3875
3876 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3877 return false;
3878
3879 auto *DstRC = &AArch64::GPR64RegClass;
3880 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3881 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3882 TII.get(TargetOpcode::SUBREG_TO_REG))
3883 .addDef(SubToRegDef)
3884 .addImm(0)
3885 .addUse(I.getOperand(1).getReg())
3886 .addImm(AArch64::sub_32);
3887 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3888 // Need to anyext the second scalar before we can use bfm
3889 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3890 TII.get(TargetOpcode::SUBREG_TO_REG))
3891 .addDef(SubToRegDef2)
3892 .addImm(0)
3893 .addUse(I.getOperand(2).getReg())
3894 .addImm(AArch64::sub_32);
3895 MachineInstr &BFM =
3896 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3897 .addDef(I.getOperand(0).getReg())
3898 .addUse(SubToRegDef)
3899 .addUse(SubToRegDef2)
3900 .addImm(32)
3901 .addImm(31);
3902 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3903 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3904 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3905 I.eraseFromParent();
3906 return true;
3907}
3908
3909static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3910 const unsigned EltSize) {
3911 // Choose a lane copy opcode and subregister based off of the size of the
3912 // vector's elements.
3913 switch (EltSize) {
3914 case 8:
3915 CopyOpc = AArch64::CPYi8;
3916 ExtractSubReg = AArch64::bsub;
3917 break;
3918 case 16:
3919 CopyOpc = AArch64::CPYi16;
3920 ExtractSubReg = AArch64::hsub;
3921 break;
3922 case 32:
3923 CopyOpc = AArch64::CPYi32;
3924 ExtractSubReg = AArch64::ssub;
3925 break;
3926 case 64:
3927 CopyOpc = AArch64::CPYi64;
3928 ExtractSubReg = AArch64::dsub;
3929 break;
3930 default:
3931 // Unknown size, bail out.
3932 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3933 return false;
3934 }
3935 return true;
3936}
3937
3938MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3939 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3940 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3941 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3942 unsigned CopyOpc = 0;
3943 unsigned ExtractSubReg = 0;
3944 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3945 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3946 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3947 return nullptr;
3948 }
3949
3950 const TargetRegisterClass *DstRC =
3951 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3952 if (!DstRC) {
3953 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3954 return nullptr;
3955 }
3956
3957 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3958 const LLT &VecTy = MRI.getType(VecReg);
3959 const TargetRegisterClass *VecRC =
3960 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3961 if (!VecRC) {
3962 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3963 return nullptr;
3964 }
3965
3966 // The register that we're going to copy into.
3967 Register InsertReg = VecReg;
3968 if (!DstReg)
3969 DstReg = MRI.createVirtualRegister(DstRC);
3970 // If the lane index is 0, we just use a subregister COPY.
3971 if (LaneIdx == 0) {
3972 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3973 .addReg(VecReg, 0, ExtractSubReg);
3974 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3975 return &*Copy;
3976 }
3977
3978 // Lane copies require 128-bit wide registers. If we're dealing with an
3979 // unpacked vector, then we need to move up to that width. Insert an implicit
3980 // def and a subregister insert to get us there.
3981 if (VecTy.getSizeInBits() != 128) {
3982 MachineInstr *ScalarToVector = emitScalarToVector(
3983 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3984 if (!ScalarToVector)
3985 return nullptr;
3986 InsertReg = ScalarToVector->getOperand(0).getReg();
3987 }
3988
3989 MachineInstr *LaneCopyMI =
3990 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3991 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3992
3993 // Make sure that we actually constrain the initial copy.
3994 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3995 return LaneCopyMI;
3996}
3997
3998bool AArch64InstructionSelector::selectExtractElt(
3999 MachineInstr &I, MachineRegisterInfo &MRI) {
4000 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4001, __extension__ __PRETTY_FUNCTION__))
4001 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4001, __extension__ __PRETTY_FUNCTION__))
;
4002 Register DstReg = I.getOperand(0).getReg();
4003 const LLT NarrowTy = MRI.getType(DstReg);
4004 const Register SrcReg = I.getOperand(1).getReg();
4005 const LLT WideTy = MRI.getType(SrcReg);
4006 (void)WideTy;
4007 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4008, __extension__ __PRETTY_FUNCTION__))
4008 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4008, __extension__ __PRETTY_FUNCTION__))
;
4009 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4009, __extension__ __PRETTY_FUNCTION__))
;
4010
4011 // Need the lane index to determine the correct copy opcode.
4012 MachineOperand &LaneIdxOp = I.getOperand(2);
4013 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4013, __extension__ __PRETTY_FUNCTION__))
;
4014
4015 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4016 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
4017 return false;
4018 }
4019
4020 // Find the index to extract from.
4021 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4022 if (!VRegAndVal)
4023 return false;
4024 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4025
4026
4027 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4028 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4029 LaneIdx, MIB);
4030 if (!Extract)
4031 return false;
4032
4033 I.eraseFromParent();
4034 return true;
4035}
4036
4037bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4038 MachineInstr &I, MachineRegisterInfo &MRI) {
4039 unsigned NumElts = I.getNumOperands() - 1;
4040 Register SrcReg = I.getOperand(NumElts).getReg();
4041 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4042 const LLT SrcTy = MRI.getType(SrcReg);
4043
4044 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4044, __extension__ __PRETTY_FUNCTION__))
;
4045 if (SrcTy.getSizeInBits() > 128) {
4046 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
4047 return false;
4048 }
4049
4050 // We implement a split vector operation by treating the sub-vectors as
4051 // scalars and extracting them.
4052 const RegisterBank &DstRB =
4053 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4054 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4055 Register Dst = I.getOperand(OpIdx).getReg();
4056 MachineInstr *Extract =
4057 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4058 if (!Extract)
4059 return false;
4060 }
4061 I.eraseFromParent();
4062 return true;
4063}
4064
4065bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4066 MachineRegisterInfo &MRI) {
4067 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4068, __extension__ __PRETTY_FUNCTION__))
4068 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4068, __extension__ __PRETTY_FUNCTION__))
;
4069
4070 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4071 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4072 AArch64::FPRRegBankID ||
4073 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4074 AArch64::FPRRegBankID) {
4075 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4076 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4077 return false;
4078 }
4079
4080 // The last operand is the vector source register, and every other operand is
4081 // a register to unpack into.
4082 unsigned NumElts = I.getNumOperands() - 1;
4083 Register SrcReg = I.getOperand(NumElts).getReg();
4084 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4085 const LLT WideTy = MRI.getType(SrcReg);
4086 (void)WideTy;
4087 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4088, __extension__ __PRETTY_FUNCTION__))
4088 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4088, __extension__ __PRETTY_FUNCTION__))
;
4089 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4090, __extension__ __PRETTY_FUNCTION__))
4090 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4090, __extension__ __PRETTY_FUNCTION__))
;
4091
4092 if (!NarrowTy.isScalar())
4093 return selectSplitVectorUnmerge(I, MRI);
4094
4095 // Choose a lane copy opcode and subregister based off of the size of the
4096 // vector's elements.
4097 unsigned CopyOpc = 0;
4098 unsigned ExtractSubReg = 0;
4099 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4100 return false;
4101
4102 // Set up for the lane copies.
4103 MachineBasicBlock &MBB = *I.getParent();
4104
4105 // Stores the registers we'll be copying from.
4106 SmallVector<Register, 4> InsertRegs;
4107
4108 // We'll use the first register twice, so we only need NumElts-1 registers.
4109 unsigned NumInsertRegs = NumElts - 1;
4110
4111 // If our elements fit into exactly 128 bits, then we can copy from the source
4112 // directly. Otherwise, we need to do a bit of setup with some subregister
4113 // inserts.
4114 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4115 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4116 } else {
4117 // No. We have to perform subregister inserts. For each insert, create an
4118 // implicit def and a subregister insert, and save the register we create.
4119 const TargetRegisterClass *RC =
4120 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4121 WideTy.getScalarSizeInBits() * NumElts);
4122 unsigned SubReg = 0;
4123 bool Found = getSubRegForClass(RC, TRI, SubReg);
4124 (void)Found;
4125 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4125, __extension__ __PRETTY_FUNCTION__))
;
4126 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4127 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4128 MachineInstr &ImpDefMI =
4129 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4130 ImpDefReg);
4131
4132 // Now, create the subregister insert from SrcReg.
4133 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4134 MachineInstr &InsMI =
4135 *BuildMI(MBB, I, I.getDebugLoc(),
4136 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4137 .addUse(ImpDefReg)
4138 .addUse(SrcReg)
4139 .addImm(SubReg);
4140
4141 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4142 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4143
4144 // Save the register so that we can copy from it after.
4145 InsertRegs.push_back(InsertReg);
4146 }
4147 }
4148
4149 // Now that we've created any necessary subregister inserts, we can
4150 // create the copies.
4151 //
4152 // Perform the first copy separately as a subregister copy.
4153 Register CopyTo = I.getOperand(0).getReg();
4154 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4155 .addReg(InsertRegs[0], 0, ExtractSubReg);
4156 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4157
4158 // Now, perform the remaining copies as vector lane copies.
4159 unsigned LaneIdx = 1;
4160 for (Register InsReg : InsertRegs) {
4161 Register CopyTo = I.getOperand(LaneIdx).getReg();
4162 MachineInstr &CopyInst =
4163 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4164 .addUse(InsReg)
4165 .addImm(LaneIdx);
4166 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4167 ++LaneIdx;
4168 }
4169
4170 // Separately constrain the first copy's destination. Because of the
4171 // limitation in constrainOperandRegClass, we can't guarantee that this will
4172 // actually be constrained. So, do it ourselves using the second operand.
4173 const TargetRegisterClass *RC =
4174 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4175 if (!RC) {
4176 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4177 return false;
4178 }
4179
4180 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4181 I.eraseFromParent();
4182 return true;
4183}
4184
4185bool AArch64InstructionSelector::selectConcatVectors(
4186 MachineInstr &I, MachineRegisterInfo &MRI) {
4187 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4188, __extension__ __PRETTY_FUNCTION__))
4188 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4188, __extension__ __PRETTY_FUNCTION__))
;
4189 Register Dst = I.getOperand(0).getReg();
4190 Register Op1 = I.getOperand(1).getReg();
4191 Register Op2 = I.getOperand(2).getReg();
4192 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4193 if (!ConcatMI)
4194 return false;
4195 I.eraseFromParent();
4196 return true;
4197}
4198
4199unsigned
4200AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4201 MachineFunction &MF) const {
4202 Type *CPTy = CPVal->getType();
4203 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4204
4205 MachineConstantPool *MCP = MF.getConstantPool();
4206 return MCP->getConstantPoolIndex(CPVal, Alignment);
4207}
4208
4209MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4210 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4211 auto &MF = MIRBuilder.getMF();
4212 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4213
4214 auto Adrp =
4215 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4216 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4217
4218 MachineInstr *LoadMI = nullptr;
4219 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4220 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4221 switch (Size) {
4222 case 16:
4223 LoadMI =
4224 &*MIRBuilder
4225 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4226 .addConstantPoolIndex(CPIdx, 0,
4227 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4228 break;
4229 case 8:
4230 LoadMI =
4231 &*MIRBuilder
4232 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4233 .addConstantPoolIndex(CPIdx, 0,
4234 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4235 break;
4236 case 4:
4237 LoadMI =
4238 &*MIRBuilder
4239 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4240 .addConstantPoolIndex(CPIdx, 0,
4241 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4242 break;
4243 case 2:
4244 LoadMI =
4245 &*MIRBuilder
4246 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4247 .addConstantPoolIndex(CPIdx, 0,
4248 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4249 break;
4250 default:
4251 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4252 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4253 return nullptr;
4254 }
4255 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4256 MachineMemOperand::MOLoad,
4257 Size, Align(Size)));
4258 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4259 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4260 return LoadMI;
4261}
4262
4263/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4264/// size and RB.
4265static std::pair<unsigned, unsigned>
4266getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4267 unsigned Opc, SubregIdx;
4268 if (RB.getID() == AArch64::GPRRegBankID) {
4269 if (EltSize == 16) {
4270 Opc = AArch64::INSvi16gpr;
4271 SubregIdx = AArch64::ssub;
4272 } else if (EltSize == 32) {
4273 Opc = AArch64::INSvi32gpr;
4274 SubregIdx = AArch64::ssub;
4275 } else if (EltSize == 64) {
4276 Opc = AArch64::INSvi64gpr;
4277 SubregIdx = AArch64::dsub;
4278 } else {
4279 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4279)
;
4280 }
4281 } else {
4282 if (EltSize == 8) {
4283 Opc = AArch64::INSvi8lane;
4284 SubregIdx = AArch64::bsub;
4285 } else if (EltSize == 16) {
4286 Opc = AArch64::INSvi16lane;
4287 SubregIdx = AArch64::hsub;
4288 } else if (EltSize == 32) {
4289 Opc = AArch64::INSvi32lane;
4290 SubregIdx = AArch64::ssub;
4291 } else if (EltSize == 64) {
4292 Opc = AArch64::INSvi64lane;
4293 SubregIdx = AArch64::dsub;
4294 } else {
4295 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4295)
;
4296 }
4297 }
4298 return std::make_pair(Opc, SubregIdx);
4299}
4300
4301MachineInstr *AArch64InstructionSelector::emitInstr(
4302 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4303 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4304 const ComplexRendererFns &RenderFns) const {
4305 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4305, __extension__ __PRETTY_FUNCTION__))
;
4306 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4307, __extension__ __PRETTY_FUNCTION__))
4307 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4307, __extension__ __PRETTY_FUNCTION__))
;
4308 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4309 if (RenderFns)
4310 for (auto &Fn : *RenderFns)
4311 Fn(MI);
4312 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4313 return &*MI;
4314}
4315
4316MachineInstr *AArch64InstructionSelector::emitAddSub(
4317 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4318 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4319 MachineIRBuilder &MIRBuilder) const {
4320 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4321 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4321, __extension__ __PRETTY_FUNCTION__))
;
4322 auto Ty = MRI.getType(LHS.getReg());
4323 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4323, __extension__ __PRETTY_FUNCTION__))
;
4324 unsigned Size = Ty.getSizeInBits();
4325 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4325, __extension__ __PRETTY_FUNCTION__))
;
4326 bool Is32Bit = Size == 32;
4327
4328 // INSTRri form with positive arithmetic immediate.
4329 if (auto Fns = selectArithImmed(RHS))
4330 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4331 MIRBuilder, Fns);
4332
4333 // INSTRri form with negative arithmetic immediate.
4334 if (auto Fns = selectNegArithImmed(RHS))
4335 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4336 MIRBuilder, Fns);
4337
4338 // INSTRrx form.
4339 if (auto Fns = selectArithExtendedRegister(RHS))
4340 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4341 MIRBuilder, Fns);
4342
4343 // INSTRrs form.
4344 if (auto Fns = selectShiftedRegister(RHS))
4345 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4346 MIRBuilder, Fns);
4347 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4348 MIRBuilder);
4349}
4350
4351MachineInstr *
4352AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4353 MachineOperand &RHS,
4354 MachineIRBuilder &MIRBuilder) const {
4355 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4356 {{AArch64::ADDXri, AArch64::ADDWri},
4357 {AArch64::ADDXrs, AArch64::ADDWrs},
4358 {AArch64::ADDXrr, AArch64::ADDWrr},
4359 {AArch64::SUBXri, AArch64::SUBWri},
4360 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4361 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4362}
4363
4364MachineInstr *
4365AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4366 MachineOperand &RHS,
4367 MachineIRBuilder &MIRBuilder) const {
4368 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4369 {{AArch64::ADDSXri, AArch64::ADDSWri},
4370 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4371 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4372 {AArch64::SUBSXri, AArch64::SUBSWri},
4373 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4374 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4375}
4376
4377MachineInstr *
4378AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4379 MachineOperand &RHS,
4380 MachineIRBuilder &MIRBuilder) const {
4381 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4382 {{AArch64::SUBSXri, AArch64::SUBSWri},
4383 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4384 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4385 {AArch64::ADDSXri, AArch64::ADDSWri},
4386 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4387 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4388}
4389
4390MachineInstr *
4391AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4392 MachineIRBuilder &MIRBuilder) const {
4393 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4394 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4395 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4396 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4397}
4398
4399MachineInstr *
4400AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4401 MachineIRBuilder &MIRBuilder) const {
4402 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4402, __extension__ __PRETTY_FUNCTION__))
;
4403 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4404 LLT Ty = MRI.getType(LHS.getReg());
4405 unsigned RegSize = Ty.getSizeInBits();
4406 bool Is32Bit = (RegSize == 32);
4407 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4408 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4409 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4410 // ANDS needs a logical immediate for its immediate form. Check if we can
4411 // fold one in.
4412 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4413 int64_t Imm = ValAndVReg->Value.getSExtValue();
4414
4415 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4416 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4417 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4418 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4419 return &*TstMI;
4420 }
4421 }
4422
4423 if (auto Fns = selectLogicalShiftedRegister(RHS))
4424 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4425 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4426}
4427
4428MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4429 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4430 MachineIRBuilder &MIRBuilder) const {
4431 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4431, __extension__ __PRETTY_FUNCTION__))
;
4432 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4432, __extension__ __PRETTY_FUNCTION__))
;
4433 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4434 LLT CmpTy = MRI.getType(LHS.getReg());
4435 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4435, __extension__ __PRETTY_FUNCTION__))
;
4436 unsigned Size = CmpTy.getSizeInBits();
4437 (void)Size;
4438 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4438, __extension__ __PRETTY_FUNCTION__))
;
4439 // Fold the compare into a cmn or tst if possible.
4440 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4441 return FoldCmp;
4442 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4443 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4444}
4445
4446MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4447 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4448 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4449#ifndef NDEBUG
4450 LLT Ty = MRI.getType(Dst);
4451 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4452, __extension__ __PRETTY_FUNCTION__))
4452 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4452, __extension__ __PRETTY_FUNCTION__))
;
4453#endif
4454 const Register ZeroReg = AArch64::WZR;
4455 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4456 auto CSet =
4457 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4458 .addImm(getInvertedCondCode(CC));
4459 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4460 return &*CSet;
4461 };
4462
4463 AArch64CC::CondCode CC1, CC2;
4464 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4465 if (CC2 == AArch64CC::AL)
4466 return EmitCSet(Dst, CC1);
4467
4468 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4469 Register Def1Reg = MRI.createVirtualRegister(RC);
4470 Register Def2Reg = MRI.createVirtualRegister(RC);
4471 EmitCSet(Def1Reg, CC1);
4472 EmitCSet(Def2Reg, CC2);
4473 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4474 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4475 return &*OrMI;
4476}
4477
4478MachineInstr *
4479AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4480 MachineIRBuilder &MIRBuilder,
4481 Optional<CmpInst::Predicate> Pred) const {
4482 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4483 LLT Ty = MRI.getType(LHS);
4484 if (Ty.isVector())
4485 return nullptr;
4486 unsigned OpSize = Ty.getSizeInBits();
4487 if (OpSize != 32 && OpSize != 64)
4488 return nullptr;
4489
4490 // If this is a compare against +0.0, then we don't have
4491 // to explicitly materialize a constant.
4492 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4493 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4494
4495 auto IsEqualityPred = [](CmpInst::Predicate P) {
4496 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4497 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4498 };
4499 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4500 // Try commutating the operands.
4501 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4502 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4503 ShouldUseImm = true;
4504 std::swap(LHS, RHS);
4505 }
4506 }
4507 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4508 {AArch64::FCMPSri, AArch64::FCMPDri}};
4509 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4510
4511 // Partially build the compare. Decide if we need to add a use for the
4512 // third operand based off whether or not we're comparing against 0.0.
4513 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4514 if (!ShouldUseImm)
4515 CmpMI.addUse(RHS);
4516 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4517 return &*CmpMI;
4518}
4519
4520MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4521 Optional<Register> Dst, Register Op1, Register Op2,
4522 MachineIRBuilder &MIRBuilder) const {
4523 // We implement a vector concat by:
4524 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4525 // 2. Insert the upper vector into the destination's upper element
4526 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4527 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4528
4529 const LLT Op1Ty = MRI.getType(Op1);
4530 const LLT Op2Ty = MRI.getType(Op2);
4531
4532 if (Op1Ty != Op2Ty) {
4533 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4534 return nullptr;
4535 }
4536 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4536, __extension__ __PRETTY_FUNCTION__))
;
4537
4538 if (Op1Ty.getSizeInBits() >= 128) {
4539 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4540 return nullptr;
4541 }
4542
4543 // At the moment we just support 64 bit vector concats.
4544 if (Op1Ty.getSizeInBits() != 64) {
4545 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4546 return nullptr;
4547 }
4548
4549 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4550 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4551 const TargetRegisterClass *DstRC =
4552 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4553
4554 MachineInstr *WidenedOp1 =
4555 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4556 MachineInstr *WidenedOp2 =
4557 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4558 if (!WidenedOp1 || !WidenedOp2) {
4559 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4560 return nullptr;
4561 }
4562
4563 // Now do the insert of the upper element.
4564 unsigned InsertOpc, InsSubRegIdx;
4565 std::tie(InsertOpc, InsSubRegIdx) =
4566 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4567
4568 if (!Dst)
4569 Dst = MRI.createVirtualRegister(DstRC);
4570 auto InsElt =
4571 MIRBuilder
4572 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4573 .addImm(1) /* Lane index */
4574 .addUse(WidenedOp2->getOperand(0).getReg())
4575 .addImm(0);
4576 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4577 return &*InsElt;
4578}
4579
4580MachineInstr *
4581AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4582 MachineIRBuilder &MIRBuilder,
4583 Register SrcReg) const {
4584 // CSINC increments the result when the predicate is false. Invert it.
4585 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4586 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4587 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4588 .addImm(InvCC);
4589 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4590 return &*I;
4591}
4592
4593std::pair<MachineInstr *, AArch64CC::CondCode>
4594AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4595 MachineOperand &LHS,
4596 MachineOperand &RHS,
4597 MachineIRBuilder &MIRBuilder) const {
4598 switch (Opcode) {
4599 default:
4600 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4600)
;
4601 case TargetOpcode::G_SADDO:
4602 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4603 case TargetOpcode::G_UADDO:
4604 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4605 case TargetOpcode::G_SSUBO:
4606 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4607 case TargetOpcode::G_USUBO:
4608 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4609 }
4610}
4611
4612bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4613 MachineRegisterInfo &MRI = *MIB.getMRI();
4614 // We want to recognize this pattern:
4615 //
4616 // $z = G_FCMP pred, $x, $y
4617 // ...
4618 // $w = G_SELECT $z, $a, $b
4619 //
4620 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4621 // some copies/truncs in between.)
4622 //
4623 // If we see this, then we can emit something like this:
4624 //
4625 // fcmp $x, $y
4626 // fcsel $w, $a, $b, pred
4627 //
4628 // Rather than emitting both of the rather long sequences in the standard
4629 // G_FCMP/G_SELECT select methods.
4630
4631 // First, check if the condition is defined by a compare.
4632 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4633 while (CondDef) {
4634 // We can only fold if all of the defs have one use.
4635 Register CondDefReg = CondDef->getOperand(0).getReg();
4636 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4637 // Unless it's another select.
4638 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4639 if (CondDef == &UI)
4640 continue;
4641 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4642 return false;
4643 }
4644 }
4645
4646 // We can skip over G_TRUNC since the condition is 1-bit.
4647 // Truncating/extending can have no impact on the value.
4648 unsigned Opc = CondDef->getOpcode();
4649 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4650 break;
4651
4652 // Can't see past copies from physregs.
4653 if (Opc == TargetOpcode::COPY &&
4654 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4655 return false;
4656
4657 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4658 }
4659
4660 // Is the condition defined by a compare?
4661 if (!CondDef)
4662 return false;
4663
4664 unsigned CondOpc = CondDef->getOpcode();
4665 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4666 return false;
4667
4668 AArch64CC::CondCode CondCode;
4669 if (CondOpc == TargetOpcode::G_ICMP) {
4670 auto Pred =
4671 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4672 CondCode = changeICMPPredToAArch64CC(Pred);
4673 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4674 CondDef->getOperand(1), MIB);
4675 } else {
4676 // Get the condition code for the select.
4677 auto Pred =
4678 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4679 AArch64CC::CondCode CondCode2;
4680 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4681
4682 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4683 // instructions to emit the comparison.
4684 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4685 // unnecessary.
4686 if (CondCode2 != AArch64CC::AL)
4687 return false;
4688
4689 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4690 CondDef->getOperand(3).getReg(), MIB)) {
4691 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4692 return false;
4693 }
4694 }
4695
4696 // Emit the select.
4697 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4698 I.getOperand(3).getReg(), CondCode, MIB);
4699 I.eraseFromParent();
4700 return true;
4701}
4702
4703MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4704 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4705 MachineIRBuilder &MIRBuilder) const {
4706 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4707, __extension__ __PRETTY_FUNCTION__))
4707 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4707, __extension__ __PRETTY_FUNCTION__))
;
4708 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4709 // We want to find this sort of thing:
4710 // x = G_SUB 0, y
4711 // G_ICMP z, x
4712 //
4713 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4714 // e.g:
4715 //
4716 // cmn z, y
4717
4718 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4719 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4720 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4721 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4722 // Given this:
4723 //
4724 // x = G_SUB 0, y
4725 // G_ICMP x, z
4726 //
4727 // Produce this:
4728 //
4729 // cmn y, z
4730 if (isCMN(LHSDef, P, MRI))
4731 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4732
4733 // Same idea here, but with the RHS of the compare instead:
4734 //
4735 // Given this:
4736 //
4737 // x = G_SUB 0, y
4738 // G_ICMP z, x
4739 //
4740 // Produce this:
4741 //
4742 // cmn z, y
4743 if (isCMN(RHSDef, P, MRI))
4744 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4745
4746 // Given this:
4747 //
4748 // z = G_AND x, y
4749 // G_ICMP z, 0
4750 //
4751 // Produce this if the compare is signed:
4752 //
4753 // tst x, y
4754 if (!CmpInst::isUnsigned(P) && LHSDef &&
4755 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4756 // Make sure that the RHS is 0.
4757 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4758 if (!ValAndVReg || ValAndVReg->Value != 0)
4759 return nullptr;
4760
4761 return emitTST(LHSDef->getOperand(1),
4762 LHSDef->getOperand(2), MIRBuilder);
4763 }
4764
4765 return nullptr;
4766}
4767
4768bool AArch64InstructionSelector::selectShuffleVector(
4769 MachineInstr &I, MachineRegisterInfo &MRI) {
4770 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4771 Register Src1Reg = I.getOperand(1).getReg();
4772 const LLT Src1Ty = MRI.getType(Src1Reg);
4773 Register Src2Reg = I.getOperand(2).getReg();
4774 const LLT Src2Ty = MRI.getType(Src2Reg);
4775 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4776
4777 MachineBasicBlock &MBB = *I.getParent();
4778 MachineFunction &MF = *MBB.getParent();
4779 LLVMContext &Ctx = MF.getFunction().getContext();
4780
4781 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4782 // it's originated from a <1 x T> type. Those should have been lowered into
4783 // G_BUILD_VECTOR earlier.
4784 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4785 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4786 return false;
4787 }
4788
4789 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4790
4791 SmallVector<Constant *, 64> CstIdxs;
4792 for (int Val : Mask) {
4793 // For now, any undef indexes we'll just assume to be 0. This should be
4794 // optimized in future, e.g. to select DUP etc.
4795 Val = Val < 0 ? 0 : Val;
4796 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4797 unsigned Offset = Byte + Val * BytesPerElt;
4798 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4799 }
4800 }
4801
4802 // Use a constant pool to load the index vector for TBL.
4803 Constant *CPVal = ConstantVector::get(CstIdxs);
4804 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4805 if (!IndexLoad) {
4806 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4807 return false;
4808 }
4809
4810 if (DstTy.getSizeInBits() != 128) {
4811 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4811, __extension__ __PRETTY_FUNCTION__))
;
4812 // This case can be done with TBL1.
4813 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4814 if (!Concat) {
4815 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4816 return false;
4817 }
4818
4819 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4820 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4821 IndexLoad->getOperand(0).getReg(), MIB);
4822
4823 auto TBL1 = MIB.buildInstr(
4824 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4825 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4826 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4827
4828 auto Copy =
4829 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4830 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4831 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4832 I.eraseFromParent();
4833 return true;
4834 }
4835
4836 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4837 // Q registers for regalloc.
4838 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4839 auto RegSeq = createQTuple(Regs, MIB);
4840 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4841 {RegSeq, IndexLoad->getOperand(0)});
4842 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4843 I.eraseFromParent();
4844 return true;
4845}
4846
4847MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4848 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4849 unsigned LaneIdx, const RegisterBank &RB,
4850 MachineIRBuilder &MIRBuilder) const {
4851 MachineInstr *InsElt = nullptr;
4852 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4853 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4854
4855 // Create a register to define with the insert if one wasn't passed in.
4856 if (!DstReg)
4857 DstReg = MRI.createVirtualRegister(DstRC);
4858
4859 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4860 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4861
4862 if (RB.getID() == AArch64::FPRRegBankID) {
4863 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4864 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4865 .addImm(LaneIdx)
4866 .addUse(InsSub->getOperand(0).getReg())
4867 .addImm(0);
4868 } else {
4869 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4870 .addImm(LaneIdx)
4871 .addUse(EltReg);
4872 }
4873
4874 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4875 return InsElt;
4876}
4877
4878bool AArch64InstructionSelector::selectUSMovFromExtend(
4879 MachineInstr &MI, MachineRegisterInfo &MRI) {
4880 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4881 MI.getOpcode() != TargetOpcode::G_ZEXT &&
4882 MI.getOpcode() != TargetOpcode::G_ANYEXT)
4883 return false;
4884 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4885 const Register DefReg = MI.getOperand(0).getReg();
4886 const LLT DstTy = MRI.getType(DefReg);
4887 unsigned DstSize = DstTy.getSizeInBits();
4888
4889 if (DstSize != 32 && DstSize != 64)
4890 return false;
4891
4892 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4893 MI.getOperand(1).getReg(), MRI);
4894 int64_t Lane;
4895 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4896 return false;
4897 Register Src0 = Extract->getOperand(1).getReg();
4898
4899 const LLT &VecTy = MRI.getType(Src0);
4900
4901 if (VecTy.getSizeInBits() != 128) {
4902 const MachineInstr *ScalarToVector = emitScalarToVector(
4903 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4904 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!")(static_cast <bool> (ScalarToVector && "Didn't expect emitScalarToVector to fail!"
) ? void (0) : __assert_fail ("ScalarToVector && \"Didn't expect emitScalarToVector to fail!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4904, __extension__ __PRETTY_FUNCTION__))
;
4905 Src0 = ScalarToVector->getOperand(0).getReg();
4906 }
4907
4908 unsigned Opcode;
4909 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4910 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4911 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4912 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4913 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4914 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4915 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4916 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4917 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4918 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4919 else
4920 llvm_unreachable("Unexpected type combo for S/UMov!")::llvm::llvm_unreachable_internal("Unexpected type combo for S/UMov!"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4920)
;
4921
4922 // We may need to generate one of these, depending on the type and sign of the
4923 // input:
4924 // DstReg = SMOV Src0, Lane;
4925 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4926 MachineInstr *ExtI = nullptr;
4927 if (DstSize == 64 && !IsSigned) {
4928 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4929 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4930 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4931 .addImm(0)
4932 .addUse(NewReg)
4933 .addImm(AArch64::sub_32);
4934 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4935 } else
4936 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4937
4938 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
4939 MI.eraseFromParent();
4940 return true;
4941}
4942
4943bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4944 MachineRegisterInfo &MRI) {
4945 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4945, __extension__ __PRETTY_FUNCTION__))
;
4946
4947 // Get information on the destination.
4948 Register DstReg = I.getOperand(0).getReg();
4949 const LLT DstTy = MRI.getType(DstReg);
4950 unsigned VecSize = DstTy.getSizeInBits();
4951
4952 // Get information on the element we want to insert into the destination.
4953 Register EltReg = I.getOperand(2).getReg();
4954 const LLT EltTy = MRI.getType(EltReg);
4955 unsigned EltSize = EltTy.getSizeInBits();
4956 if (EltSize < 16 || EltSize > 64)
4957 return false; // Don't support all element types yet.
4958
4959 // Find the definition of the index. Bail out if it's not defined by a
4960 // G_CONSTANT.
4961 Register IdxReg = I.getOperand(3).getReg();
4962 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4963 if (!VRegAndVal)
4964 return false;
4965 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4966
4967 // Perform the lane insert.
4968 Register SrcReg = I.getOperand(1).getReg();
4969 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4970
4971 if (VecSize < 128) {
4972 // If the vector we're inserting into is smaller than 128 bits, widen it
4973 // to 128 to do the insert.
4974 MachineInstr *ScalarToVec =
4975 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
4976 if (!ScalarToVec)
4977 return false;
4978 SrcReg = ScalarToVec->getOperand(0).getReg();
4979 }
4980
4981 // Create an insert into a new FPR128 register.
4982 // Note that if our vector is already 128 bits, we end up emitting an extra
4983 // register.
4984 MachineInstr *InsMI =
4985 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
4986
4987 if (VecSize < 128) {
4988 // If we had to widen to perform the insert, then we have to demote back to
4989 // the original size to get the result we want.
4990 Register DemoteVec = InsMI->getOperand(0).getReg();
4991 const TargetRegisterClass *RC =
4992 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4993 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4994 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4995 return false;
4996 }
4997 unsigned SubReg = 0;
4998 if (!getSubRegForClass(RC, TRI, SubReg))
4999 return false;
5000 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5001 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
5002 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
5003 return false;
5004 }
5005 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5006 .addReg(DemoteVec, 0, SubReg);
5007 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5008 } else {
5009 // No widening needed.
5010 InsMI->getOperand(0).setReg(DstReg);
5011 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5012 }
5013
5014 I.eraseFromParent();
5015 return true;
5016}
5017
5018MachineInstr *
5019AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5020 MachineIRBuilder &MIRBuilder,
5021 MachineRegisterInfo &MRI) {
5022 LLT DstTy = MRI.getType(Dst);
5023 unsigned DstSize = DstTy.getSizeInBits();
5024 if (CV->isNullValue()) {
5025 if (DstSize == 128) {
5026 auto Mov =
5027 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5028 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5029 return &*Mov;
5030 }
5031
5032 if (DstSize == 64) {
5033 auto Mov =
5034 MIRBuilder
5035 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5036 .addImm(0);
5037 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5038 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5039 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5040 return &*Copy;
5041 }
5042 }
5043
5044 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5045 if (!CPLoad) {
5046 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
5047 return nullptr;
5048 }
5049
5050 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5051 RBI.constrainGenericRegister(
5052 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5053 return &*Copy;
5054}
5055
5056bool AArch64InstructionSelector::tryOptConstantBuildVec(
5057 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5058 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5058, __extension__ __PRETTY_FUNCTION__))
;
5059 unsigned DstSize = DstTy.getSizeInBits();
5060 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5060, __extension__ __PRETTY_FUNCTION__))
;
5061 if (DstSize < 32)
5062 return false;
5063 // Check if we're building a constant vector, in which case we want to
5064 // generate a constant pool load instead of a vector insert sequence.
5065 SmallVector<Constant *, 16> Csts;
5066 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5067 // Try to find G_CONSTANT or G_FCONSTANT
5068 auto *OpMI =
5069 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5070 if (OpMI)
5071 Csts.emplace_back(
5072 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5073 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5074 I.getOperand(Idx).getReg(), MRI)))
5075 Csts.emplace_back(
5076 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5077 else
5078 return false;
5079 }
5080 Constant *CV = ConstantVector::get(Csts);
5081 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5082 return false;
5083 I.eraseFromParent();
5084 return true;
5085}
5086
5087bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5088 MachineInstr &I, MachineRegisterInfo &MRI) {
5089 // Given:
5090 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5091 //
5092 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5093 Register Dst = I.getOperand(0).getReg();
5094 Register EltReg = I.getOperand(1).getReg();
5095 LLT EltTy = MRI.getType(EltReg);
5096 // If the index isn't on the same bank as its elements, then this can't be a
5097 // SUBREG_TO_REG.
5098 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5099 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5100 if (EltRB != DstRB)
5101 return false;
5102 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5103 [&MRI](const MachineOperand &Op) {
5104 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5105 MRI);
5106 }))
5107 return false;
5108 unsigned SubReg;
5109 const TargetRegisterClass *EltRC =
5110 getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5111 if (!EltRC)
5112 return false;
5113 const TargetRegisterClass *DstRC =
5114 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5115 if (!DstRC)
5116 return false;
5117 if (!getSubRegForClass(EltRC, TRI, SubReg))
5118 return false;
5119 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5120 .addImm(0)
5121 .addUse(EltReg)
5122 .addImm(SubReg);
5123 I.eraseFromParent();
5124 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5125 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5126}
5127
5128bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5129 MachineRegisterInfo &MRI) {
5130 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5130, __extension__ __PRETTY_FUNCTION__))
;
5131 // Until we port more of the optimized selections, for now just use a vector
5132 // insert sequence.
5133 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5134 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5135 unsigned EltSize = EltTy.getSizeInBits();
5136
5137 if (tryOptConstantBuildVec(I, DstTy, MRI))
5138 return true;
5139 if (tryOptBuildVecToSubregToReg(I, MRI))
5140 return true;
5141
5142 if (EltSize < 16 || EltSize > 64)
5143 return false; // Don't support all element types yet.
5144 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5145
5146 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5147 MachineInstr *ScalarToVec =
5148 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5149 I.getOperand(1).getReg(), MIB);
5150 if (!ScalarToVec)
5151 return false;
5152
5153 Register DstVec = ScalarToVec->getOperand(0).getReg();
5154 unsigned DstSize = DstTy.getSizeInBits();
5155
5156 // Keep track of the last MI we inserted. Later on, we might be able to save
5157 // a copy using it.
5158 MachineInstr *PrevMI = nullptr;
5159 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5160 // Note that if we don't do a subregister copy, we can end up making an
5161 // extra register.
5162 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5163 MIB);
5164 DstVec = PrevMI->getOperand(0).getReg();
5165 }
5166
5167 // If DstTy's size in bits is less than 128, then emit a subregister copy
5168 // from DstVec to the last register we've defined.
5169 if (DstSize < 128) {
5170 // Force this to be FPR using the destination vector.
5171 const TargetRegisterClass *RC =
5172 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5173 if (!RC)
5174 return false;
5175 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5176 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
5177 return false;
5178 }
5179
5180 unsigned SubReg = 0;
5181 if (!getSubRegForClass(RC, TRI, SubReg))
5182 return false;
5183 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5184 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
5185 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
5186 return false;
5187 }
5188
5189 Register Reg = MRI.createVirtualRegister(RC);
5190 Register DstReg = I.getOperand(0).getReg();
5191
5192 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5193 MachineOperand &RegOp = I.getOperand(1);
5194 RegOp.setReg(Reg);
5195 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5196 } else {
5197 // We don't need a subregister copy. Save a copy by re-using the
5198 // destination register on the final insert.
5199 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5199, __extension__ __PRETTY_FUNCTION__))
;
5200 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5201 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5202 }
5203
5204 I.eraseFromParent();
5205 return true;
5206}
5207
5208bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5209 unsigned NumVecs,
5210 MachineInstr &I) {
5211 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS"
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5211, __extension__ __PRETTY_FUNCTION__))
;
5
Assuming the condition is true
6
'?' condition is true
5212 assert(Opc && "Expected an opcode?")(static_cast <bool> (Opc && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opc && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5212, __extension__ __PRETTY_FUNCTION__))
;
7
'?' condition is true
5213 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors")(static_cast <bool> (NumVecs > 1 && NumVecs <
5 && "Only support 2, 3, or 4 vectors") ? void (0) :
__assert_fail ("NumVecs > 1 && NumVecs < 5 && \"Only support 2, 3, or 4 vectors\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5213, __extension__ __PRETTY_FUNCTION__))
;
8
'?' condition is true
5214 auto &MRI = *MIB.getMRI();
5215 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5216 unsigned Size = Ty.getSizeInBits();
5217 assert((Size == 64 || Size == 128) &&(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5218, __extension__ __PRETTY_FUNCTION__))
9
Assuming 'Size' is not equal to 64
10
Assuming 'Size' is equal to 128
11
'?' condition is true
5218 "Destination must be 64 bits or 128 bits?")(static_cast <bool> ((Size == 64 || Size == 128) &&
"Destination must be 64 bits or 128 bits?") ? void (0) : __assert_fail
("(Size == 64 || Size == 128) && \"Destination must be 64 bits or 128 bits?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5218, __extension__ __PRETTY_FUNCTION__))
;
5219 unsigned SubReg = Size
11.1
'Size' is not equal to 64
11.1
'Size' is not equal to 64
11.1
'Size' is not equal to 64
== 64 ? AArch64::dsub0 : AArch64::qsub0;
12
'?' condition is false
5220 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5221 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?")(static_cast <bool> (MRI.getType(Ptr).isPointer() &&
"Expected a pointer type?") ? void (0) : __assert_fail ("MRI.getType(Ptr).isPointer() && \"Expected a pointer type?\""
, "/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5221, __extension__ __PRETTY_FUNCTION__))
;
13
'?' condition is true
5222 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5223 Load.cloneMemRefs(I);
5224 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5225 Register SelectedLoadDst = Load->getOperand(0).getReg();
5226 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
14
Loop condition is true. Entering loop body
5227 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5228 .addReg(SelectedLoadDst, 0, SubReg + Idx);
5229 // Emit the subreg copies and immediately select them.
5230 // FIXME: We should refactor our copy code into an emitCopy helper and
5231 // clean up uses of this pattern elsewhere in the selector.
5232 selectCopy(*Vec, TII, MRI, TRI, RBI);
15
Calling 'selectCopy'
5233 }
5234 return true;
5235}
5236
5237bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5238 MachineInstr &I, MachineRegisterInfo &MRI) {
5239 // Find the intrinsic ID.
5240 unsigned IntrinID = I.getIntrinsicID();
5241
5242 const LLT S8 = LLT::scalar(8);
5243 const LLT S16 = LLT::scalar(16);
5244 const LLT S32 = LLT::scalar(32);
5245 const LLT S64 = LLT::scalar(64);
5246 const LLT P0 = LLT::pointer(0, 64);
5247 // Select the instruction.
5248 switch (IntrinID) {
1
Control jumps to 'case aarch64_neon_ld4:' at line 5295
5249 default:
5250 return false;
5251 case Intrinsic::aarch64_ldxp:
5252 case Intrinsic::aarch64_ldaxp: {
5253 auto NewI = MIB.buildInstr(
5254 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5255 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5256 {I.getOperand(3)});
5257 NewI.cloneMemRefs(I);
5258 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5259 break;
5260 }
5261 case Intrinsic::trap:
5262 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5263 break;
5264 case Intrinsic::debugtrap:
5265 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5266 break;
5267 case Intrinsic::ubsantrap:
5268 MIB.buildInstr(AArch64::BRK, {}, {})
5269 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5270 break;
5271 case Intrinsic::aarch64_neon_ld2: {
5272 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5273 unsigned Opc = 0;
5274 if (Ty == LLT::fixed_vector(8, S8))
5275 Opc = AArch64::LD2Twov8b;
5276 else if (Ty == LLT::fixed_vector(16, S8))
5277 Opc = AArch64::LD2Twov16b;
5278 else if (Ty == LLT::fixed_vector(4, S16))
5279 Opc = AArch64::LD2Twov4h;
5280 else if (Ty == LLT::fixed_vector(8, S16))
5281 Opc = AArch64::LD2Twov8h;
5282 else if (Ty == LLT::fixed_vector(2, S32))
5283 Opc = AArch64::LD2Twov2s;
5284 else if (Ty == LLT::fixed_vector(4, S32))
5285 Opc = AArch64::LD2Twov4s;
5286 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5287 Opc = AArch64::LD2Twov2d;
5288 else if (Ty == S64 || Ty == P0)
5289 Opc = AArch64::LD1Twov1d;
5290 else
5291 llvm_unreachable("Unexpected type for ld2!")::llvm::llvm_unreachable_internal("Unexpected type for ld2!",
"/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5291)
;
5292 selectVectorLoadIntrinsic(Opc, 2, I);
5293 break;
5294 }
5295 case Intrinsic::aarch64_neon_ld4: {
5296 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5297 unsigned Opc = 0;
5298 if (Ty == LLT::fixed_vector(8, S8))
2
Taking false branch
5299 Opc = AArch64::LD4Fourv8b;
5300 else if (Ty == LLT::fixed_vector(16, S8))
3
Taking true branch
5301 Opc = AArch64::LD4Fourv16b;
5302 else if (Ty == LLT::fixed_vector(4, S16))
5303 Opc = AArch64::LD4Fourv4h;
5304 else if (Ty == LLT::fixed_vector(8, S16))
5305 Opc = AArch64::LD4Fourv8h;
5306 else if (Ty == LLT::fixed_vector(2, S32))
5307 Opc = AArch64::LD4Fourv2s;
5308 else if (Ty == LLT::fixed_vector(4, S32))
5309 Opc = AArch64::LD4Fourv4s;
5310 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5311 Opc = AArch64::LD4Fourv2d;
5312 else if (Ty == S64 || Ty == P0)
5313 Opc = AArch64::LD1Fourv1d;
5314 else
5315 llvm_unreachable("Unexpected type for ld4!")::llvm::llvm_unreachable_internal("Unexpected type for ld4!",
"/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5315)
;
5316 selectVectorLoadIntrinsic(Opc, 4, I);
4
Calling 'AArch64InstructionSelector::selectVectorLoadIntrinsic'
5317 break;
5318 }
5319 case Intrinsic::aarch64_neon_st2: {
5320 Register Src1 = I.getOperand(1).getReg();
5321 Register Src2 = I.getOperand(2).getReg();
5322 Register Ptr = I.getOperand(3).getReg();
5323 LLT Ty = MRI.getType(Src1);
5324 unsigned Opc;
5325 if (Ty == LLT::fixed_vector(8, S8))
5326 Opc = AArch64::ST2Twov8b;
5327 else if (Ty == LLT::fixed_vector(16, S8))
5328 Opc = AArch64::ST2Twov16b;
5329 else if (Ty == LLT::fixed_vector(4, S16))
5330 Opc = AArch64::ST2Twov4h;
5331 else if (Ty == LLT::fixed_vector(8, S16))
5332 Opc = AArch64::ST2Twov8h;
5333 else if (Ty == LLT::fixed_vector(2, S32))
5334 Opc = AArch64::ST2Twov2s;
5335 else if (Ty == LLT::fixed_vector(4, S32))
5336 Opc = AArch64::ST2Twov4s;
5337 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5338 Opc = AArch64::ST2Twov2d;
5339 else if (Ty == S64 || Ty == P0)
5340 Opc = AArch64::ST1Twov1d;
5341 else
5342 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"/build/llvm-toolchain-snapshot-14~++20211016100712+8e1d532707fd/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5342)
;
5343 SmallVector<Register, 2> Regs = {Src1, Src2};
5344 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5345 : createDTuple(Regs, MIB);
5346 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5347 Store.cloneMemRefs(I);
5348 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5349 break;
5350 }
5351 }
5352
5353 I.eraseFromParent();
5354 return true;
5355}
5356
5357bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5358 MachineRegisterInfo &MRI) {
5359 unsigned IntrinID = I.getIntrinsicID();
5360
5361 switch (IntrinID) {
5362 default:
5363 break;
5364 case Intrinsic::aarch64_crypto_sha1h: {
5365 Register DstReg = I.getOperand(0).getReg();
5366 Register SrcReg = I.getOperand(2).getReg();
5367
5368 // FIXME: Should this be an assert?
5369 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5370 MRI.getType(SrcReg).getSizeInBits() != 32)
5371 return false;
5372
5373 // The operation has to happen on FPRs. Set up some new FPR registers for
5374 // the source and destination if they are on GPRs.
5375 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5376 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5377 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5378
5379 // Make sure the copy ends up getting constrained properly.
5380 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5381 AArch64::GPR32RegClass, MRI);
5382 }
5383
5384 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5385 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5386
5387 // Actually insert the instruction.
5388 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5389 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5390
5391 // Did we create a new register for the destination?
5392 if (DstReg != I.getOperand(0).getReg()) {
5393 // Yep. Copy the result of the instruction back into the original
5394 // destination.
5395 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5396 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5397 AArch64::GPR32RegClass, MRI);
5398 }
5399
5400 I.eraseFromParent();
5401 return true;
5402 }
5403 case Intrinsic::frameaddress:
5404 case Intrinsic::returnaddress: {
5405 MachineFunction &MF = *I.getParent()->getParent();
5406 MachineFrameInfo &MFI = MF.getFrameInfo();
5407
5408 unsigned Depth = I.getOperand(2).getImm();
5409 Register DstReg = I.getOperand(0).getReg();
5410 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5411
5412 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5413 if (!MFReturnAddr) {
5414 // Insert the copy from LR/X30 into the entry block, before it can be
5415 // clobbered by anything.
5416 MFI.setReturnAddressIsTaken(true);
5417 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5418 AArch64::GPR64RegClass);
5419 }
5420
5421 if (STI.hasPAuth()) {
5422 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5423 } else {
5424 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5425 MIB.buildInstr(AArch64::XPACLRI);
5426 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5427 }
5428
5429 I.eraseFromParent();
5430 return true;
5431 }
5432
5433 MFI.setFrameAddressIsTaken(true);
5434 Register FrameAddr(AArch64::FP);