Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 972, column 7
6th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-07-26-235520-9401-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "AArch64GlobalISelUtils.h"
22#include "MCTargetDesc/AArch64AddressingModes.h"
23#include "MCTargetDesc/AArch64MCTargetDesc.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
27#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineConstantPool.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineMemOperand.h"
35#include "llvm/CodeGen/MachineOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/TargetOpcodes.h"
38#include "llvm/IR/Constants.h"
39#include "llvm/IR/DerivedTypes.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/IntrinsicsAArch64.h"
44#include "llvm/Pass.h"
45#include "llvm/Support/Debug.h"
46#include "llvm/Support/raw_ostream.h"
47
48#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
49
50using namespace llvm;
51using namespace MIPatternMatch;
52using namespace AArch64GISelUtils;
53
54namespace llvm {
55class BlockFrequencyInfo;
56class ProfileSummaryInfo;
57}
58
59namespace {
60
61#define GET_GLOBALISEL_PREDICATE_BITSET
62#include "AArch64GenGlobalISel.inc"
63#undef GET_GLOBALISEL_PREDICATE_BITSET
64
65class AArch64InstructionSelector : public InstructionSelector {
66public:
67 AArch64InstructionSelector(const AArch64TargetMachine &TM,
68 const AArch64Subtarget &STI,
69 const AArch64RegisterBankInfo &RBI);
70
71 bool select(MachineInstr &I) override;
72 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
73
74 void setupMF(MachineFunction &MF, GISelKnownBits *KB,
75 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
76 BlockFrequencyInfo *BFI) override {
77 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
78 MIB.setMF(MF);
79
80 // hasFnAttribute() is expensive to call on every BRCOND selection, so
81 // cache it here for each run of the selector.
82 ProduceNonFlagSettingCondBr =
83 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
84 MFReturnAddr = Register();
85
86 processPHIs(MF);
87 }
88
89private:
90 /// tblgen-erated 'select' implementation, used as the initial selector for
91 /// the patterns that don't require complex C++.
92 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
93
94 // A lowering phase that runs before any selection attempts.
95 // Returns true if the instruction was modified.
96 bool preISelLower(MachineInstr &I);
97
98 // An early selection function that runs before the selectImpl() call.
99 bool earlySelect(MachineInstr &I);
100
101 // Do some preprocessing of G_PHIs before we begin selection.
102 void processPHIs(MachineFunction &MF);
103
104 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
105
106 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
107 bool contractCrossBankCopyIntoStore(MachineInstr &I,
108 MachineRegisterInfo &MRI);
109
110 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
111
112 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
113 MachineRegisterInfo &MRI) const;
114 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
115 MachineRegisterInfo &MRI) const;
116
117 ///@{
118 /// Helper functions for selectCompareBranch.
119 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
120 MachineIRBuilder &MIB) const;
121 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
122 MachineIRBuilder &MIB) const;
123 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
124 MachineIRBuilder &MIB) const;
125 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
126 MachineBasicBlock *DstMBB,
127 MachineIRBuilder &MIB) const;
128 ///@}
129
130 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
131 MachineRegisterInfo &MRI);
132
133 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
134 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
135
136 // Helper to generate an equivalent of scalar_to_vector into a new register,
137 // returned via 'Dst'.
138 MachineInstr *emitScalarToVector(unsigned EltSize,
139 const TargetRegisterClass *DstRC,
140 Register Scalar,
141 MachineIRBuilder &MIRBuilder) const;
142
143 /// Emit a lane insert into \p DstReg, or a new vector register if None is
144 /// provided.
145 ///
146 /// The lane inserted into is defined by \p LaneIdx. The vector source
147 /// register is given by \p SrcReg. The register containing the element is
148 /// given by \p EltReg.
149 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
150 Register EltReg, unsigned LaneIdx,
151 const RegisterBank &RB,
152 MachineIRBuilder &MIRBuilder) const;
153
154 /// Emit a sequence of instructions representing a constant \p CV for a
155 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
156 ///
157 /// \returns the last instruction in the sequence on success, and nullptr
158 /// otherwise.
159 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
160 MachineIRBuilder &MIRBuilder,
161 MachineRegisterInfo &MRI);
162
163 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
164 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
165 MachineRegisterInfo &MRI);
166 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
167 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
168 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
169
170 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
171 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
172 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
173 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
174 bool selectIntrinsicWithSideEffects(MachineInstr &I,
175 MachineRegisterInfo &MRI);
176 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
177 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
178 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
179 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
180 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
181 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
182 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
183 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
184
185 unsigned emitConstantPoolEntry(const Constant *CPVal,
186 MachineFunction &MF) const;
187 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
188 MachineIRBuilder &MIRBuilder) const;
189
190 // Emit a vector concat operation.
191 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
192 Register Op2,
193 MachineIRBuilder &MIRBuilder) const;
194
195 // Emit an integer compare between LHS and RHS, which checks for Predicate.
196 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
197 MachineOperand &Predicate,
198 MachineIRBuilder &MIRBuilder) const;
199
200 /// Emit a floating point comparison between \p LHS and \p RHS.
201 /// \p Pred if given is the intended predicate to use.
202 MachineInstr *emitFPCompare(Register LHS, Register RHS,
203 MachineIRBuilder &MIRBuilder,
204 Optional<CmpInst::Predicate> = None) const;
205
206 MachineInstr *emitInstr(unsigned Opcode,
207 std::initializer_list<llvm::DstOp> DstOps,
208 std::initializer_list<llvm::SrcOp> SrcOps,
209 MachineIRBuilder &MIRBuilder,
210 const ComplexRendererFns &RenderFns = None) const;
211 /// Helper function to emit an add or sub instruction.
212 ///
213 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
214 /// in a specific order.
215 ///
216 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
217 ///
218 /// \code
219 /// const std::array<std::array<unsigned, 2>, 4> Table {
220 /// {{AArch64::ADDXri, AArch64::ADDWri},
221 /// {AArch64::ADDXrs, AArch64::ADDWrs},
222 /// {AArch64::ADDXrr, AArch64::ADDWrr},
223 /// {AArch64::SUBXri, AArch64::SUBWri},
224 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
225 /// \endcode
226 ///
227 /// Each row in the table corresponds to a different addressing mode. Each
228 /// column corresponds to a different register size.
229 ///
230 /// \attention Rows must be structured as follows:
231 /// - Row 0: The ri opcode variants
232 /// - Row 1: The rs opcode variants
233 /// - Row 2: The rr opcode variants
234 /// - Row 3: The ri opcode variants for negative immediates
235 /// - Row 4: The rx opcode variants
236 ///
237 /// \attention Columns must be structured as follows:
238 /// - Column 0: The 64-bit opcode variants
239 /// - Column 1: The 32-bit opcode variants
240 ///
241 /// \p Dst is the destination register of the binop to emit.
242 /// \p LHS is the left-hand operand of the binop to emit.
243 /// \p RHS is the right-hand operand of the binop to emit.
244 MachineInstr *emitAddSub(
245 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
246 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
247 MachineIRBuilder &MIRBuilder) const;
248 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
249 MachineOperand &RHS,
250 MachineIRBuilder &MIRBuilder) const;
251 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
254 MachineIRBuilder &MIRBuilder) const;
255 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
256 MachineIRBuilder &MIRBuilder) const;
257 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
258 MachineIRBuilder &MIRBuilder) const;
259 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
260 AArch64CC::CondCode CC,
261 MachineIRBuilder &MIRBuilder) const;
262 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
263 const RegisterBank &DstRB, LLT ScalarTy,
264 Register VecReg, unsigned LaneIdx,
265 MachineIRBuilder &MIRBuilder) const;
266
267 /// Emit a CSet for an integer compare.
268 ///
269 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
270 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
271 MachineIRBuilder &MIRBuilder,
272 Register SrcReg = AArch64::WZR) const;
273 /// Emit a CSet for a FP compare.
274 ///
275 /// \p Dst is expected to be a 32-bit scalar register.
276 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
277 MachineIRBuilder &MIRBuilder) const;
278
279 /// Emit the overflow op for \p Opcode.
280 ///
281 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
282 /// G_USUBO, etc.
283 std::pair<MachineInstr *, AArch64CC::CondCode>
284 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
285 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
286
287 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
288 /// \p IsNegative is true if the test should be "not zero".
289 /// This will also optimize the test bit instruction when possible.
290 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
291 MachineBasicBlock *DstMBB,
292 MachineIRBuilder &MIB) const;
293
294 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
295 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
296 MachineBasicBlock *DestMBB,
297 MachineIRBuilder &MIB) const;
298
299 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
300 // We use these manually instead of using the importer since it doesn't
301 // support SDNodeXForm.
302 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
303 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
304 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
305 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
306
307 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
308 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
309 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
310
311 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
312 unsigned Size) const;
313
314 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
315 return selectAddrModeUnscaled(Root, 1);
316 }
317 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
318 return selectAddrModeUnscaled(Root, 2);
319 }
320 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
321 return selectAddrModeUnscaled(Root, 4);
322 }
323 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
324 return selectAddrModeUnscaled(Root, 8);
325 }
326 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
327 return selectAddrModeUnscaled(Root, 16);
328 }
329
330 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
331 /// from complex pattern matchers like selectAddrModeIndexed().
332 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
333 MachineRegisterInfo &MRI) const;
334
335 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
336 unsigned Size) const;
337 template <int Width>
338 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
339 return selectAddrModeIndexed(Root, Width / 8);
340 }
341
342 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
343 const MachineRegisterInfo &MRI) const;
344 ComplexRendererFns
345 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
346 unsigned SizeInBytes) const;
347
348 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
349 /// or not a shift + extend should be folded into an addressing mode. Returns
350 /// None when this is not profitable or possible.
351 ComplexRendererFns
352 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
353 MachineOperand &Offset, unsigned SizeInBytes,
354 bool WantsExt) const;
355 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
356 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
357 unsigned SizeInBytes) const;
358 template <int Width>
359 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
360 return selectAddrModeXRO(Root, Width / 8);
361 }
362
363 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
364 unsigned SizeInBytes) const;
365 template <int Width>
366 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
367 return selectAddrModeWRO(Root, Width / 8);
368 }
369
370 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
371
372 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
373 return selectShiftedRegister(Root);
374 }
375
376 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
377 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
378 // For now, make them the same. The only difference between the two is that
379 // logical shifts are allowed to fold in rotates. Otherwise, these are
380 // functionally the same.
381 return selectShiftedRegister(Root);
382 }
383
384 /// Given an extend instruction, determine the correct shift-extend type for
385 /// that instruction.
386 ///
387 /// If the instruction is going to be used in a load or store, pass
388 /// \p IsLoadStore = true.
389 AArch64_AM::ShiftExtendType
390 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
391 bool IsLoadStore = false) const;
392
393 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
394 ///
395 /// \returns Either \p Reg if no change was necessary, or the new register
396 /// created by moving \p Reg.
397 ///
398 /// Note: This uses emitCopy right now.
399 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
400 MachineIRBuilder &MIB) const;
401
402 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
403
404 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
405 int OpIdx = -1) const;
406 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
407 int OpIdx = -1) const;
408 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
409 int OpIdx = -1) const;
410 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
411 int OpIdx = -1) const;
412 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
413 int OpIdx = -1) const;
414 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
415 int OpIdx = -1) const;
416
417 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
418 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
419
420 // Optimization methods.
421 bool tryOptSelect(MachineInstr &MI);
422 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
423 MachineOperand &Predicate,
424 MachineIRBuilder &MIRBuilder) const;
425
426 /// Return true if \p MI is a load or store of \p NumBytes bytes.
427 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
428
429 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
430 /// register zeroed out. In other words, the result of MI has been explicitly
431 /// zero extended.
432 bool isDef32(const MachineInstr &MI) const;
433
434 const AArch64TargetMachine &TM;
435 const AArch64Subtarget &STI;
436 const AArch64InstrInfo &TII;
437 const AArch64RegisterInfo &TRI;
438 const AArch64RegisterBankInfo &RBI;
439
440 bool ProduceNonFlagSettingCondBr = false;
441
442 // Some cached values used during selection.
443 // We use LR as a live-in register, and we keep track of it here as it can be
444 // clobbered by calls.
445 Register MFReturnAddr;
446
447 MachineIRBuilder MIB;
448
449#define GET_GLOBALISEL_PREDICATES_DECL
450#include "AArch64GenGlobalISel.inc"
451#undef GET_GLOBALISEL_PREDICATES_DECL
452
453// We declare the temporaries used by selectImpl() in the class to minimize the
454// cost of constructing placeholder values.
455#define GET_GLOBALISEL_TEMPORARIES_DECL
456#include "AArch64GenGlobalISel.inc"
457#undef GET_GLOBALISEL_TEMPORARIES_DECL
458};
459
460} // end anonymous namespace
461
462#define GET_GLOBALISEL_IMPL
463#include "AArch64GenGlobalISel.inc"
464#undef GET_GLOBALISEL_IMPL
465
466AArch64InstructionSelector::AArch64InstructionSelector(
467 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
468 const AArch64RegisterBankInfo &RBI)
469 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
470 TRI(*STI.getRegisterInfo()), RBI(RBI),
471#define GET_GLOBALISEL_PREDICATES_INIT
472#include "AArch64GenGlobalISel.inc"
473#undef GET_GLOBALISEL_PREDICATES_INIT
474#define GET_GLOBALISEL_TEMPORARIES_INIT
475#include "AArch64GenGlobalISel.inc"
476#undef GET_GLOBALISEL_TEMPORARIES_INIT
477{
478}
479
480// FIXME: This should be target-independent, inferred from the types declared
481// for each class in the bank.
482static const TargetRegisterClass *
483getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
484 const RegisterBankInfo &RBI,
485 bool GetAllRegSet = false) {
486 if (RB.getID() == AArch64::GPRRegBankID) {
487 if (Ty.getSizeInBits() <= 32)
488 return GetAllRegSet ? &AArch64::GPR32allRegClass
489 : &AArch64::GPR32RegClass;
490 if (Ty.getSizeInBits() == 64)
491 return GetAllRegSet ? &AArch64::GPR64allRegClass
492 : &AArch64::GPR64RegClass;
493 if (Ty.getSizeInBits() == 128)
494 return &AArch64::XSeqPairsClassRegClass;
495 return nullptr;
496 }
497
498 if (RB.getID() == AArch64::FPRRegBankID) {
499 if (Ty.getSizeInBits() <= 16)
500 return &AArch64::FPR16RegClass;
501 if (Ty.getSizeInBits() == 32)
502 return &AArch64::FPR32RegClass;
503 if (Ty.getSizeInBits() == 64)
504 return &AArch64::FPR64RegClass;
505 if (Ty.getSizeInBits() == 128)
506 return &AArch64::FPR128RegClass;
507 return nullptr;
508 }
509
510 return nullptr;
511}
512
513/// Given a register bank, and size in bits, return the smallest register class
514/// that can represent that combination.
515static const TargetRegisterClass *
516getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
517 bool GetAllRegSet = false) {
518 unsigned RegBankID = RB.getID();
519
520 if (RegBankID == AArch64::GPRRegBankID) {
521 if (SizeInBits <= 32)
522 return GetAllRegSet ? &AArch64::GPR32allRegClass
523 : &AArch64::GPR32RegClass;
524 if (SizeInBits == 64)
525 return GetAllRegSet ? &AArch64::GPR64allRegClass
526 : &AArch64::GPR64RegClass;
527 if (SizeInBits == 128)
528 return &AArch64::XSeqPairsClassRegClass;
529 }
530
531 if (RegBankID == AArch64::FPRRegBankID) {
532 switch (SizeInBits) {
533 default:
534 return nullptr;
535 case 8:
536 return &AArch64::FPR8RegClass;
537 case 16:
538 return &AArch64::FPR16RegClass;
539 case 32:
540 return &AArch64::FPR32RegClass;
541 case 64:
542 return &AArch64::FPR64RegClass;
543 case 128:
544 return &AArch64::FPR128RegClass;
545 }
546 }
547
548 return nullptr;
549}
550
551/// Returns the correct subregister to use for a given register class.
552static bool getSubRegForClass(const TargetRegisterClass *RC,
553 const TargetRegisterInfo &TRI, unsigned &SubReg) {
554 switch (TRI.getRegSizeInBits(*RC)) {
55
Control jumps to the 'default' case at line 570
555 case 8:
556 SubReg = AArch64::bsub;
557 break;
558 case 16:
559 SubReg = AArch64::hsub;
560 break;
561 case 32:
562 if (RC != &AArch64::FPR32RegClass)
563 SubReg = AArch64::sub_32;
564 else
565 SubReg = AArch64::ssub;
566 break;
567 case 64:
568 SubReg = AArch64::dsub;
569 break;
570 default:
571 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
56
Assuming 'DebugFlag' is false
57
Loop condition is false. Exiting loop
572 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
573 return false;
58
Returning without writing to 'SubReg'
574 }
575
576 return true;
577}
578
579/// Returns the minimum size the given register bank can hold.
580static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
581 switch (RB.getID()) {
582 case AArch64::GPRRegBankID:
583 return 32;
584 case AArch64::FPRRegBankID:
585 return 8;
586 default:
587 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 587)
;
588 }
589}
590
591/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
592/// Helper function for functions like createDTuple and createQTuple.
593///
594/// \p RegClassIDs - The list of register class IDs available for some tuple of
595/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
596/// expected to contain between 2 and 4 tuple classes.
597///
598/// \p SubRegs - The list of subregister classes associated with each register
599/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
600/// subregister class. The index of each subregister class is expected to
601/// correspond with the index of each register class.
602///
603/// \returns Either the destination register of REG_SEQUENCE instruction that
604/// was created, or the 0th element of \p Regs if \p Regs contains a single
605/// element.
606static Register createTuple(ArrayRef<Register> Regs,
607 const unsigned RegClassIDs[],
608 const unsigned SubRegs[], MachineIRBuilder &MIB) {
609 unsigned NumRegs = Regs.size();
610 if (NumRegs == 1)
611 return Regs[0];
612 assert(NumRegs >= 2 && NumRegs <= 4 &&(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 613, __extension__ __PRETTY_FUNCTION__))
613 "Only support between two and 4 registers in a tuple!")(static_cast <bool> (NumRegs >= 2 && NumRegs
<= 4 && "Only support between two and 4 registers in a tuple!"
) ? void (0) : __assert_fail ("NumRegs >= 2 && NumRegs <= 4 && \"Only support between two and 4 registers in a tuple!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 613, __extension__ __PRETTY_FUNCTION__))
;
614 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
615 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
616 auto RegSequence =
617 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
618 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
619 RegSequence.addUse(Regs[I]);
620 RegSequence.addImm(SubRegs[I]);
621 }
622 return RegSequence.getReg(0);
623}
624
625/// Create a tuple of D-registers using the registers in \p Regs.
626static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
627 static const unsigned RegClassIDs[] = {
628 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
629 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
630 AArch64::dsub2, AArch64::dsub3};
631 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
632}
633
634/// Create a tuple of Q-registers using the registers in \p Regs.
635static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
636 static const unsigned RegClassIDs[] = {
637 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
638 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
639 AArch64::qsub2, AArch64::qsub3};
640 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
641}
642
643static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
644 auto &MI = *Root.getParent();
645 auto &MBB = *MI.getParent();
646 auto &MF = *MBB.getParent();
647 auto &MRI = MF.getRegInfo();
648 uint64_t Immed;
649 if (Root.isImm())
650 Immed = Root.getImm();
651 else if (Root.isCImm())
652 Immed = Root.getCImm()->getZExtValue();
653 else if (Root.isReg()) {
654 auto ValAndVReg =
655 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
656 if (!ValAndVReg)
657 return None;
658 Immed = ValAndVReg->Value.getSExtValue();
659 } else
660 return None;
661 return Immed;
662}
663
664/// Check whether \p I is a currently unsupported binary operation:
665/// - it has an unsized type
666/// - an operand is not a vreg
667/// - all operands are not in the same bank
668/// These are checks that should someday live in the verifier, but right now,
669/// these are mostly limitations of the aarch64 selector.
670static bool unsupportedBinOp(const MachineInstr &I,
671 const AArch64RegisterBankInfo &RBI,
672 const MachineRegisterInfo &MRI,
673 const AArch64RegisterInfo &TRI) {
674 LLT Ty = MRI.getType(I.getOperand(0).getReg());
675 if (!Ty.isValid()) {
676 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
677 return true;
678 }
679
680 const RegisterBank *PrevOpBank = nullptr;
681 for (auto &MO : I.operands()) {
682 // FIXME: Support non-register operands.
683 if (!MO.isReg()) {
684 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
685 return true;
686 }
687
688 // FIXME: Can generic operations have physical registers operands? If
689 // so, this will need to be taught about that, and we'll need to get the
690 // bank out of the minimal class for the register.
691 // Either way, this needs to be documented (and possibly verified).
692 if (!Register::isVirtualRegister(MO.getReg())) {
693 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
694 return true;
695 }
696
697 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
698 if (!OpBank) {
699 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
700 return true;
701 }
702
703 if (PrevOpBank && OpBank != PrevOpBank) {
704 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
705 return true;
706 }
707 PrevOpBank = OpBank;
708 }
709 return false;
710}
711
712/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
713/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
714/// and of size \p OpSize.
715/// \returns \p GenericOpc if the combination is unsupported.
716static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
717 unsigned OpSize) {
718 switch (RegBankID) {
719 case AArch64::GPRRegBankID:
720 if (OpSize == 32) {
721 switch (GenericOpc) {
722 case TargetOpcode::G_SHL:
723 return AArch64::LSLVWr;
724 case TargetOpcode::G_LSHR:
725 return AArch64::LSRVWr;
726 case TargetOpcode::G_ASHR:
727 return AArch64::ASRVWr;
728 default:
729 return GenericOpc;
730 }
731 } else if (OpSize == 64) {
732 switch (GenericOpc) {
733 case TargetOpcode::G_PTR_ADD:
734 return AArch64::ADDXrr;
735 case TargetOpcode::G_SHL:
736 return AArch64::LSLVXr;
737 case TargetOpcode::G_LSHR:
738 return AArch64::LSRVXr;
739 case TargetOpcode::G_ASHR:
740 return AArch64::ASRVXr;
741 default:
742 return GenericOpc;
743 }
744 }
745 break;
746 case AArch64::FPRRegBankID:
747 switch (OpSize) {
748 case 32:
749 switch (GenericOpc) {
750 case TargetOpcode::G_FADD:
751 return AArch64::FADDSrr;
752 case TargetOpcode::G_FSUB:
753 return AArch64::FSUBSrr;
754 case TargetOpcode::G_FMUL:
755 return AArch64::FMULSrr;
756 case TargetOpcode::G_FDIV:
757 return AArch64::FDIVSrr;
758 default:
759 return GenericOpc;
760 }
761 case 64:
762 switch (GenericOpc) {
763 case TargetOpcode::G_FADD:
764 return AArch64::FADDDrr;
765 case TargetOpcode::G_FSUB:
766 return AArch64::FSUBDrr;
767 case TargetOpcode::G_FMUL:
768 return AArch64::FMULDrr;
769 case TargetOpcode::G_FDIV:
770 return AArch64::FDIVDrr;
771 case TargetOpcode::G_OR:
772 return AArch64::ORRv8i8;
773 default:
774 return GenericOpc;
775 }
776 }
777 break;
778 }
779 return GenericOpc;
780}
781
782/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
783/// appropriate for the (value) register bank \p RegBankID and of memory access
784/// size \p OpSize. This returns the variant with the base+unsigned-immediate
785/// addressing mode (e.g., LDRXui).
786/// \returns \p GenericOpc if the combination is unsupported.
787static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
788 unsigned OpSize) {
789 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
790 switch (RegBankID) {
791 case AArch64::GPRRegBankID:
792 switch (OpSize) {
793 case 8:
794 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
795 case 16:
796 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
797 case 32:
798 return isStore ? AArch64::STRWui : AArch64::LDRWui;
799 case 64:
800 return isStore ? AArch64::STRXui : AArch64::LDRXui;
801 }
802 break;
803 case AArch64::FPRRegBankID:
804 switch (OpSize) {
805 case 8:
806 return isStore ? AArch64::STRBui : AArch64::LDRBui;
807 case 16:
808 return isStore ? AArch64::STRHui : AArch64::LDRHui;
809 case 32:
810 return isStore ? AArch64::STRSui : AArch64::LDRSui;
811 case 64:
812 return isStore ? AArch64::STRDui : AArch64::LDRDui;
813 }
814 break;
815 }
816 return GenericOpc;
817}
818
819#ifndef NDEBUG
820/// Helper function that verifies that we have a valid copy at the end of
821/// selectCopy. Verifies that the source and dest have the expected sizes and
822/// then returns true.
823static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
824 const MachineRegisterInfo &MRI,
825 const TargetRegisterInfo &TRI,
826 const RegisterBankInfo &RBI) {
827 const Register DstReg = I.getOperand(0).getReg();
828 const Register SrcReg = I.getOperand(1).getReg();
829 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
830 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
831
832 // Make sure the size of the source and dest line up.
833 assert((static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
834 (DstSize == SrcSize ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
835 // Copies are a mean to setup initial types, the number of(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
836 // bits may not exactly match.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
837 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
838 // Copies are a mean to copy bits around, as long as we are(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
839 // on the same register class, that's fine. Otherwise, that(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
840 // means we need some SUBREG_TO_REG or AND & co.(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
841 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
842 "Copy with different width?!")(static_cast <bool> ((DstSize == SrcSize || (Register::
isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
(((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize
> SrcSize)) && "Copy with different width?!") ? void
(0) : __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 842, __extension__ __PRETTY_FUNCTION__))
;
843
844 // Check the size of the destination.
845 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 846, __extension__ __PRETTY_FUNCTION__))
846 "GPRs cannot get more than 64-bit width values")(static_cast <bool> ((DstSize <= 64 || DstBank.getID
() == AArch64::FPRRegBankID) && "GPRs cannot get more than 64-bit width values"
) ? void (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 846, __extension__ __PRETTY_FUNCTION__))
;
847
848 return true;
849}
850#endif
851
852/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
853/// to \p *To.
854///
855/// E.g "To = COPY SrcReg:SubReg"
856static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
857 const RegisterBankInfo &RBI, Register SrcReg,
858 const TargetRegisterClass *To, unsigned SubReg) {
859 assert(SrcReg.isValid() && "Expected a valid source register?")(static_cast <bool> (SrcReg.isValid() && "Expected a valid source register?"
) ? void (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 859, __extension__ __PRETTY_FUNCTION__))
;
860 assert(To && "Destination register class cannot be null")(static_cast <bool> (To && "Destination register class cannot be null"
) ? void (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 860, __extension__ __PRETTY_FUNCTION__))
;
861 assert(SubReg && "Expected a valid subregister")(static_cast <bool> (SubReg && "Expected a valid subregister"
) ? void (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 861, __extension__ __PRETTY_FUNCTION__))
;
862
863 MachineIRBuilder MIB(I);
864 auto SubRegCopy =
865 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
866 MachineOperand &RegOp = I.getOperand(1);
867 RegOp.setReg(SubRegCopy.getReg(0));
868
869 // It's possible that the destination register won't be constrained. Make
870 // sure that happens.
871 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
872 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
873
874 return true;
875}
876
877/// Helper function to get the source and destination register classes for a
878/// copy. Returns a std::pair containing the source register class for the
879/// copy, and the destination register class for the copy. If a register class
880/// cannot be determined, then it will be nullptr.
881static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
882getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
883 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
884 const RegisterBankInfo &RBI) {
885 Register DstReg = I.getOperand(0).getReg();
886 Register SrcReg = I.getOperand(1).getReg();
887 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
888 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
889 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
890 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
891
892 // Special casing for cross-bank copies of s1s. We can technically represent
893 // a 1-bit value with any size of register. The minimum size for a GPR is 32
894 // bits. So, we need to put the FPR on 32 bits as well.
895 //
896 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
897 // then we can pull it into the helpers that get the appropriate class for a
898 // register bank. Or make a new helper that carries along some constraint
899 // information.
900 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
901 SrcSize = DstSize = 32;
902
903 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
904 getMinClassForRegBank(DstRegBank, DstSize, true)};
905}
906
907static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
908 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
909 const RegisterBankInfo &RBI) {
910 Register DstReg = I.getOperand(0).getReg();
911 Register SrcReg = I.getOperand(1).getReg();
912 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
913 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
914
915 // Find the correct register classes for the source and destination registers.
916 const TargetRegisterClass *SrcRC;
917 const TargetRegisterClass *DstRC;
918 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
26
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
37
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
38
Calling 'tuple::operator='
41
Returning from 'tuple::operator='
919
920 if (!DstRC) {
42
Assuming 'DstRC' is non-null
43
Taking false branch
921 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
922 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
923 return false;
924 }
925
926 // A couple helpers below, for making sure that the copy we produce is valid.
927
928 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
929 // to verify that the src and dst are the same size, since that's handled by
930 // the SUBREG_TO_REG.
931 bool KnownValid = false;
932
933 // Returns true, or asserts if something we don't expect happens. Instead of
934 // returning true, we return isValidCopy() to ensure that we verify the
935 // result.
936 auto CheckCopy = [&]() {
937 // If we have a bitcast or something, we can't have physical registers.
938 assert((I.isCopy() ||(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 941, __extension__ __PRETTY_FUNCTION__))
939 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 941, __extension__ __PRETTY_FUNCTION__))
940 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 941, __extension__ __PRETTY_FUNCTION__))
941 "No phys reg on generic operator!")(static_cast <bool> ((I.isCopy() || (!Register::isPhysicalRegister
(I.getOperand(0).getReg()) && !Register::isPhysicalRegister
(I.getOperand(1).getReg()))) && "No phys reg on generic operator!"
) ? void (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 941, __extension__ __PRETTY_FUNCTION__))
;
942 bool ValidCopy = true;
943#ifndef NDEBUG
944 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
945 assert(ValidCopy && "Invalid copy.")(static_cast <bool> (ValidCopy && "Invalid copy."
) ? void (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 945, __extension__ __PRETTY_FUNCTION__))
;
946#endif
947 (void)KnownValid;
948 return ValidCopy;
949 };
950
951 // Is this a copy? If so, then we may need to insert a subregister copy.
952 if (I.isCopy()) {
44
Calling 'MachineInstr::isCopy'
47
Returning from 'MachineInstr::isCopy'
48
Taking true branch
953 // Yes. Check if there's anything to fix up.
954 if (!SrcRC) {
49
Assuming 'SrcRC' is non-null
50
Taking false branch
955 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
956 return false;
957 }
958
959 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
960 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
961 unsigned SubReg;
51
'SubReg' declared without an initial value
962
963 // If the source bank doesn't support a subregister copy small enough,
964 // then we first need to copy to the destination bank.
965 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
52
Assuming the condition is true
53
Taking true branch
966 const TargetRegisterClass *DstTempRC =
967 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
968 getSubRegForClass(DstRC, TRI, SubReg);
54
Calling 'getSubRegForClass'
59
Returning from 'getSubRegForClass'
969
970 MachineIRBuilder MIB(I);
971 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
972 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
60
6th function call argument is an uninitialized value
973 } else if (SrcSize > DstSize) {
974 // If the source register is bigger than the destination we need to
975 // perform a subregister copy.
976 const TargetRegisterClass *SubRegRC =
977 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
978 getSubRegForClass(SubRegRC, TRI, SubReg);
979 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
980 } else if (DstSize > SrcSize) {
981 // If the destination register is bigger than the source we need to do
982 // a promotion using SUBREG_TO_REG.
983 const TargetRegisterClass *PromotionRC =
984 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
985 getSubRegForClass(SrcRC, TRI, SubReg);
986
987 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
988 BuildMI(*I.getParent(), I, I.getDebugLoc(),
989 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
990 .addImm(0)
991 .addUse(SrcReg)
992 .addImm(SubReg);
993 MachineOperand &RegOp = I.getOperand(1);
994 RegOp.setReg(PromoteReg);
995
996 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
997 KnownValid = true;
998 }
999
1000 // If the destination is a physical register, then there's nothing to
1001 // change, so we're done.
1002 if (Register::isPhysicalRegister(DstReg))
1003 return CheckCopy();
1004 }
1005
1006 // No need to constrain SrcReg. It will get constrained when we hit another
1007 // of its use or its defs. Copies do not have constraints.
1008 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1009 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
1010 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
1011 return false;
1012 }
1013
1014 // If this a GPR ZEXT that we want to just reduce down into a copy.
1015 // The sizes will be mismatched with the source < 32b but that's ok.
1016 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1017 I.setDesc(TII.get(AArch64::COPY));
1018 assert(SrcRegBank.getID() == AArch64::GPRRegBankID)(static_cast <bool> (SrcRegBank.getID() == AArch64::GPRRegBankID
) ? void (0) : __assert_fail ("SrcRegBank.getID() == AArch64::GPRRegBankID"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1018, __extension__ __PRETTY_FUNCTION__))
;
1019 return selectCopy(I, TII, MRI, TRI, RBI);
1020 }
1021
1022 I.setDesc(TII.get(AArch64::COPY));
1023 return CheckCopy();
1024}
1025
1026static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1027 if (!DstTy.isScalar() || !SrcTy.isScalar())
1028 return GenericOpc;
1029
1030 const unsigned DstSize = DstTy.getSizeInBits();
1031 const unsigned SrcSize = SrcTy.getSizeInBits();
1032
1033 switch (DstSize) {
1034 case 32:
1035 switch (SrcSize) {
1036 case 32:
1037 switch (GenericOpc) {
1038 case TargetOpcode::G_SITOFP:
1039 return AArch64::SCVTFUWSri;
1040 case TargetOpcode::G_UITOFP:
1041 return AArch64::UCVTFUWSri;
1042 case TargetOpcode::G_FPTOSI:
1043 return AArch64::FCVTZSUWSr;
1044 case TargetOpcode::G_FPTOUI:
1045 return AArch64::FCVTZUUWSr;
1046 default:
1047 return GenericOpc;
1048 }
1049 case 64:
1050 switch (GenericOpc) {
1051 case TargetOpcode::G_SITOFP:
1052 return AArch64::SCVTFUXSri;
1053 case TargetOpcode::G_UITOFP:
1054 return AArch64::UCVTFUXSri;
1055 case TargetOpcode::G_FPTOSI:
1056 return AArch64::FCVTZSUWDr;
1057 case TargetOpcode::G_FPTOUI:
1058 return AArch64::FCVTZUUWDr;
1059 default:
1060 return GenericOpc;
1061 }
1062 default:
1063 return GenericOpc;
1064 }
1065 case 64:
1066 switch (SrcSize) {
1067 case 32:
1068 switch (GenericOpc) {
1069 case TargetOpcode::G_SITOFP:
1070 return AArch64::SCVTFUWDri;
1071 case TargetOpcode::G_UITOFP:
1072 return AArch64::UCVTFUWDri;
1073 case TargetOpcode::G_FPTOSI:
1074 return AArch64::FCVTZSUXSr;
1075 case TargetOpcode::G_FPTOUI:
1076 return AArch64::FCVTZUUXSr;
1077 default:
1078 return GenericOpc;
1079 }
1080 case 64:
1081 switch (GenericOpc) {
1082 case TargetOpcode::G_SITOFP:
1083 return AArch64::SCVTFUXDri;
1084 case TargetOpcode::G_UITOFP:
1085 return AArch64::UCVTFUXDri;
1086 case TargetOpcode::G_FPTOSI:
1087 return AArch64::FCVTZSUXDr;
1088 case TargetOpcode::G_FPTOUI:
1089 return AArch64::FCVTZUUXDr;
1090 default:
1091 return GenericOpc;
1092 }
1093 default:
1094 return GenericOpc;
1095 }
1096 default:
1097 return GenericOpc;
1098 };
1099 return GenericOpc;
1100}
1101
1102MachineInstr *
1103AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1104 Register False, AArch64CC::CondCode CC,
1105 MachineIRBuilder &MIB) const {
1106 MachineRegisterInfo &MRI = *MIB.getMRI();
1107 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1109, __extension__ __PRETTY_FUNCTION__))
1108 RBI.getRegBank(True, MRI, TRI)->getID() &&(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1109, __extension__ __PRETTY_FUNCTION__))
1109 "Expected both select operands to have the same regbank?")(static_cast <bool> (RBI.getRegBank(False, MRI, TRI)->
getID() == RBI.getRegBank(True, MRI, TRI)->getID() &&
"Expected both select operands to have the same regbank?") ?
void (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1109, __extension__ __PRETTY_FUNCTION__))
;
1110 LLT Ty = MRI.getType(True);
1111 if (Ty.isVector())
1112 return nullptr;
1113 const unsigned Size = Ty.getSizeInBits();
1114 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1115, __extension__ __PRETTY_FUNCTION__))
1115 "Expected 32 bit or 64 bit select only?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected 32 bit or 64 bit select only?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1115, __extension__ __PRETTY_FUNCTION__))
;
1116 const bool Is32Bit = Size == 32;
1117 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1118 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1119 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1120 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1121 return &*FCSel;
1122 }
1123
1124 // By default, we'll try and emit a CSEL.
1125 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1126 bool Optimized = false;
1127 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1128 &Optimized](Register &Reg, Register &OtherReg,
1129 bool Invert) {
1130 if (Optimized)
1131 return false;
1132
1133 // Attempt to fold:
1134 //
1135 // %sub = G_SUB 0, %x
1136 // %select = G_SELECT cc, %reg, %sub
1137 //
1138 // Into:
1139 // %select = CSNEG %reg, %x, cc
1140 Register MatchReg;
1141 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1142 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1143 Reg = MatchReg;
1144 if (Invert) {
1145 CC = AArch64CC::getInvertedCondCode(CC);
1146 std::swap(Reg, OtherReg);
1147 }
1148 return true;
1149 }
1150
1151 // Attempt to fold:
1152 //
1153 // %xor = G_XOR %x, -1
1154 // %select = G_SELECT cc, %reg, %xor
1155 //
1156 // Into:
1157 // %select = CSINV %reg, %x, cc
1158 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1159 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1160 Reg = MatchReg;
1161 if (Invert) {
1162 CC = AArch64CC::getInvertedCondCode(CC);
1163 std::swap(Reg, OtherReg);
1164 }
1165 return true;
1166 }
1167
1168 // Attempt to fold:
1169 //
1170 // %add = G_ADD %x, 1
1171 // %select = G_SELECT cc, %reg, %add
1172 //
1173 // Into:
1174 // %select = CSINC %reg, %x, cc
1175 if (mi_match(Reg, MRI,
1176 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1177 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1178 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1179 Reg = MatchReg;
1180 if (Invert) {
1181 CC = AArch64CC::getInvertedCondCode(CC);
1182 std::swap(Reg, OtherReg);
1183 }
1184 return true;
1185 }
1186
1187 return false;
1188 };
1189
1190 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1191 // true/false values are constants.
1192 // FIXME: All of these patterns already exist in tablegen. We should be
1193 // able to import these.
1194 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1195 &Optimized]() {
1196 if (Optimized)
1197 return false;
1198 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1199 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1200 if (!TrueCst && !FalseCst)
1201 return false;
1202
1203 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1204 if (TrueCst && FalseCst) {
1205 int64_t T = TrueCst->Value.getSExtValue();
1206 int64_t F = FalseCst->Value.getSExtValue();
1207
1208 if (T == 0 && F == 1) {
1209 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1210 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1211 True = ZReg;
1212 False = ZReg;
1213 return true;
1214 }
1215
1216 if (T == 0 && F == -1) {
1217 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1218 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1219 True = ZReg;
1220 False = ZReg;
1221 return true;
1222 }
1223 }
1224
1225 if (TrueCst) {
1226 int64_t T = TrueCst->Value.getSExtValue();
1227 if (T == 1) {
1228 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1229 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1230 True = False;
1231 False = ZReg;
1232 CC = AArch64CC::getInvertedCondCode(CC);
1233 return true;
1234 }
1235
1236 if (T == -1) {
1237 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1238 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1239 True = False;
1240 False = ZReg;
1241 CC = AArch64CC::getInvertedCondCode(CC);
1242 return true;
1243 }
1244 }
1245
1246 if (FalseCst) {
1247 int64_t F = FalseCst->Value.getSExtValue();
1248 if (F == 1) {
1249 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1250 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1251 False = ZReg;
1252 return true;
1253 }
1254
1255 if (F == -1) {
1256 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1257 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1258 False = ZReg;
1259 return true;
1260 }
1261 }
1262 return false;
1263 };
1264
1265 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1266 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1267 Optimized |= TryOptSelectCst();
1268 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1269 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1270 return &*SelectInst;
1271}
1272
1273static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1274 switch (P) {
1275 default:
1276 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1276)
;
1277 case CmpInst::ICMP_NE:
1278 return AArch64CC::NE;
1279 case CmpInst::ICMP_EQ:
1280 return AArch64CC::EQ;
1281 case CmpInst::ICMP_SGT:
1282 return AArch64CC::GT;
1283 case CmpInst::ICMP_SGE:
1284 return AArch64CC::GE;
1285 case CmpInst::ICMP_SLT:
1286 return AArch64CC::LT;
1287 case CmpInst::ICMP_SLE:
1288 return AArch64CC::LE;
1289 case CmpInst::ICMP_UGT:
1290 return AArch64CC::HI;
1291 case CmpInst::ICMP_UGE:
1292 return AArch64CC::HS;
1293 case CmpInst::ICMP_ULT:
1294 return AArch64CC::LO;
1295 case CmpInst::ICMP_ULE:
1296 return AArch64CC::LS;
1297 }
1298}
1299
1300/// Return a register which can be used as a bit to test in a TB(N)Z.
1301static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1302 MachineRegisterInfo &MRI) {
1303 assert(Reg.isValid() && "Expected valid register!")(static_cast <bool> (Reg.isValid() && "Expected valid register!"
) ? void (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1303, __extension__ __PRETTY_FUNCTION__))
;
1304 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1305 unsigned Opc = MI->getOpcode();
1306
1307 if (!MI->getOperand(0).isReg() ||
1308 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1309 break;
1310
1311 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1312 //
1313 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1314 // on the truncated x is the same as the bit number on x.
1315 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1316 Opc == TargetOpcode::G_TRUNC) {
1317 Register NextReg = MI->getOperand(1).getReg();
1318 // Did we find something worth folding?
1319 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1320 break;
1321
1322 // NextReg is worth folding. Keep looking.
1323 Reg = NextReg;
1324 continue;
1325 }
1326
1327 // Attempt to find a suitable operation with a constant on one side.
1328 Optional<uint64_t> C;
1329 Register TestReg;
1330 switch (Opc) {
1331 default:
1332 break;
1333 case TargetOpcode::G_AND:
1334 case TargetOpcode::G_XOR: {
1335 TestReg = MI->getOperand(1).getReg();
1336 Register ConstantReg = MI->getOperand(2).getReg();
1337 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1338 if (!VRegAndVal) {
1339 // AND commutes, check the other side for a constant.
1340 // FIXME: Can we canonicalize the constant so that it's always on the
1341 // same side at some point earlier?
1342 std::swap(ConstantReg, TestReg);
1343 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1344 }
1345 if (VRegAndVal)
1346 C = VRegAndVal->Value.getSExtValue();
1347 break;
1348 }
1349 case TargetOpcode::G_ASHR:
1350 case TargetOpcode::G_LSHR:
1351 case TargetOpcode::G_SHL: {
1352 TestReg = MI->getOperand(1).getReg();
1353 auto VRegAndVal =
1354 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1355 if (VRegAndVal)
1356 C = VRegAndVal->Value.getSExtValue();
1357 break;
1358 }
1359 }
1360
1361 // Didn't find a constant or viable register. Bail out of the loop.
1362 if (!C || !TestReg.isValid())
1363 break;
1364
1365 // We found a suitable instruction with a constant. Check to see if we can
1366 // walk through the instruction.
1367 Register NextReg;
1368 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1369 switch (Opc) {
1370 default:
1371 break;
1372 case TargetOpcode::G_AND:
1373 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1374 if ((*C >> Bit) & 1)
1375 NextReg = TestReg;
1376 break;
1377 case TargetOpcode::G_SHL:
1378 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1379 // the type of the register.
1380 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1381 NextReg = TestReg;
1382 Bit = Bit - *C;
1383 }
1384 break;
1385 case TargetOpcode::G_ASHR:
1386 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1387 // in x
1388 NextReg = TestReg;
1389 Bit = Bit + *C;
1390 if (Bit >= TestRegSize)
1391 Bit = TestRegSize - 1;
1392 break;
1393 case TargetOpcode::G_LSHR:
1394 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1395 if ((Bit + *C) < TestRegSize) {
1396 NextReg = TestReg;
1397 Bit = Bit + *C;
1398 }
1399 break;
1400 case TargetOpcode::G_XOR:
1401 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1402 // appropriate.
1403 //
1404 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1405 //
1406 // tbz x', b -> tbnz x, b
1407 //
1408 // Because x' only has the b-th bit set if x does not.
1409 if ((*C >> Bit) & 1)
1410 Invert = !Invert;
1411 NextReg = TestReg;
1412 break;
1413 }
1414
1415 // Check if we found anything worth folding.
1416 if (!NextReg.isValid())
1417 return Reg;
1418 Reg = NextReg;
1419 }
1420
1421 return Reg;
1422}
1423
1424MachineInstr *AArch64InstructionSelector::emitTestBit(
1425 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1426 MachineIRBuilder &MIB) const {
1427 assert(TestReg.isValid())(static_cast <bool> (TestReg.isValid()) ? void (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1427, __extension__ __PRETTY_FUNCTION__))
;
1428 assert(ProduceNonFlagSettingCondBr &&(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1429, __extension__ __PRETTY_FUNCTION__))
1429 "Cannot emit TB(N)Z with speculation tracking!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!") ? void (0) :
__assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1429, __extension__ __PRETTY_FUNCTION__))
;
1430 MachineRegisterInfo &MRI = *MIB.getMRI();
1431
1432 // Attempt to optimize the test bit by walking over instructions.
1433 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1434 LLT Ty = MRI.getType(TestReg);
1435 unsigned Size = Ty.getSizeInBits();
1436 assert(!Ty.isVector() && "Expected a scalar!")(static_cast <bool> (!Ty.isVector() && "Expected a scalar!"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1436, __extension__ __PRETTY_FUNCTION__))
;
1437 assert(Bit < 64 && "Bit is too large!")(static_cast <bool> (Bit < 64 && "Bit is too large!"
) ? void (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1437, __extension__ __PRETTY_FUNCTION__))
;
1438
1439 // When the test register is a 64-bit register, we have to narrow to make
1440 // TBNZW work.
1441 bool UseWReg = Bit < 32;
1442 unsigned NecessarySize = UseWReg ? 32 : 64;
1443 if (Size != NecessarySize)
1444 TestReg = moveScalarRegClass(
1445 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1446 MIB);
1447
1448 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1449 {AArch64::TBZW, AArch64::TBNZW}};
1450 unsigned Opc = OpcTable[UseWReg][IsNegative];
1451 auto TestBitMI =
1452 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1453 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1454 return &*TestBitMI;
1455}
1456
1457bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1458 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1459 MachineIRBuilder &MIB) const {
1460 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")(static_cast <bool> (AndInst.getOpcode() == TargetOpcode
::G_AND && "Expected G_AND only?") ? void (0) : __assert_fail
("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1460, __extension__ __PRETTY_FUNCTION__))
;
1461 // Given something like this:
1462 //
1463 // %x = ...Something...
1464 // %one = G_CONSTANT i64 1
1465 // %zero = G_CONSTANT i64 0
1466 // %and = G_AND %x, %one
1467 // %cmp = G_ICMP intpred(ne), %and, %zero
1468 // %cmp_trunc = G_TRUNC %cmp
1469 // G_BRCOND %cmp_trunc, %bb.3
1470 //
1471 // We want to try and fold the AND into the G_BRCOND and produce either a
1472 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1473 //
1474 // In this case, we'd get
1475 //
1476 // TBNZ %x %bb.3
1477 //
1478
1479 // Check if the AND has a constant on its RHS which we can use as a mask.
1480 // If it's a power of 2, then it's the same as checking a specific bit.
1481 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1482 auto MaybeBit = getConstantVRegValWithLookThrough(
1483 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1484 if (!MaybeBit)
1485 return false;
1486
1487 int32_t Bit = MaybeBit->Value.exactLogBase2();
1488 if (Bit < 0)
1489 return false;
1490
1491 Register TestReg = AndInst.getOperand(1).getReg();
1492
1493 // Emit a TB(N)Z.
1494 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1495 return true;
1496}
1497
1498MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1499 bool IsNegative,
1500 MachineBasicBlock *DestMBB,
1501 MachineIRBuilder &MIB) const {
1502 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")(static_cast <bool> (ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!") ? void (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1502, __extension__ __PRETTY_FUNCTION__))
;
1503 MachineRegisterInfo &MRI = *MIB.getMRI();
1504 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1506, __extension__ __PRETTY_FUNCTION__))
1505 AArch64::GPRRegBankID &&(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1506, __extension__ __PRETTY_FUNCTION__))
1506 "Expected GPRs only?")(static_cast <bool> (RBI.getRegBank(CompareReg, MRI, TRI
)->getID() == AArch64::GPRRegBankID && "Expected GPRs only?"
) ? void (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1506, __extension__ __PRETTY_FUNCTION__))
;
1507 auto Ty = MRI.getType(CompareReg);
1508 unsigned Width = Ty.getSizeInBits();
1509 assert(!Ty.isVector() && "Expected scalar only?")(static_cast <bool> (!Ty.isVector() && "Expected scalar only?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1509, __extension__ __PRETTY_FUNCTION__))
;
1510 assert(Width <= 64 && "Expected width to be at most 64?")(static_cast <bool> (Width <= 64 && "Expected width to be at most 64?"
) ? void (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1510, __extension__ __PRETTY_FUNCTION__))
;
1511 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1512 {AArch64::CBNZW, AArch64::CBNZX}};
1513 unsigned Opc = OpcTable[IsNegative][Width == 64];
1514 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1515 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1516 return &*BranchMI;
1517}
1518
1519bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1520 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1521 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)(static_cast <bool> (FCmp.getOpcode() == TargetOpcode::
G_FCMP) ? void (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1521, __extension__ __PRETTY_FUNCTION__))
;
1522 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1522, __extension__ __PRETTY_FUNCTION__))
;
1523 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1524 // totally clean. Some of them require two branches to implement.
1525 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1526 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1527 Pred);
1528 AArch64CC::CondCode CC1, CC2;
1529 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1530 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1531 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1532 if (CC2 != AArch64CC::AL)
1533 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1534 I.eraseFromParent();
1535 return true;
1536}
1537
1538bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1539 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1540 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1540, __extension__ __PRETTY_FUNCTION__))
;
1541 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1541, __extension__ __PRETTY_FUNCTION__))
;
1542 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1543 //
1544 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1545 // instructions will not be produced, as they are conditional branch
1546 // instructions that do not set flags.
1547 if (!ProduceNonFlagSettingCondBr)
1548 return false;
1549
1550 MachineRegisterInfo &MRI = *MIB.getMRI();
1551 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1552 auto Pred =
1553 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1554 Register LHS = ICmp.getOperand(2).getReg();
1555 Register RHS = ICmp.getOperand(3).getReg();
1556
1557 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1558 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1559 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1560
1561 // When we can emit a TB(N)Z, prefer that.
1562 //
1563 // Handle non-commutative condition codes first.
1564 // Note that we don't want to do this when we have a G_AND because it can
1565 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1566 if (VRegAndVal && !AndInst) {
1567 int64_t C = VRegAndVal->Value.getSExtValue();
1568
1569 // When we have a greater-than comparison, we can just test if the msb is
1570 // zero.
1571 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1572 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1573 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1574 I.eraseFromParent();
1575 return true;
1576 }
1577
1578 // When we have a less than comparison, we can just test if the msb is not
1579 // zero.
1580 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1581 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1582 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1583 I.eraseFromParent();
1584 return true;
1585 }
1586 }
1587
1588 // Attempt to handle commutative condition codes. Right now, that's only
1589 // eq/ne.
1590 if (ICmpInst::isEquality(Pred)) {
1591 if (!VRegAndVal) {
1592 std::swap(RHS, LHS);
1593 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1594 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1595 }
1596
1597 if (VRegAndVal && VRegAndVal->Value == 0) {
1598 // If there's a G_AND feeding into this branch, try to fold it away by
1599 // emitting a TB(N)Z instead.
1600 //
1601 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1602 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1603 // would be redundant.
1604 if (AndInst &&
1605 tryOptAndIntoCompareBranch(
1606 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1607 I.eraseFromParent();
1608 return true;
1609 }
1610
1611 // Otherwise, try to emit a CB(N)Z instead.
1612 auto LHSTy = MRI.getType(LHS);
1613 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1614 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1615 I.eraseFromParent();
1616 return true;
1617 }
1618 }
1619 }
1620
1621 return false;
1622}
1623
1624bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1625 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1626 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)(static_cast <bool> (ICmp.getOpcode() == TargetOpcode::
G_ICMP) ? void (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1626, __extension__ __PRETTY_FUNCTION__))
;
1627 assert(I.getOpcode() == TargetOpcode::G_BRCOND)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRCOND
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1627, __extension__ __PRETTY_FUNCTION__))
;
1628 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1629 return true;
1630
1631 // Couldn't optimize. Emit a compare + a Bcc.
1632 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1633 auto PredOp = ICmp.getOperand(1);
1634 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1635 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1636 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1637 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1638 I.eraseFromParent();
1639 return true;
1640}
1641
1642bool AArch64InstructionSelector::selectCompareBranch(
1643 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1644 Register CondReg = I.getOperand(0).getReg();
1645 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1646 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1647 CondReg = CCMI->getOperand(1).getReg();
1648 CCMI = MRI.getVRegDef(CondReg);
1649 }
1650
1651 // Try to select the G_BRCOND using whatever is feeding the condition if
1652 // possible.
1653 unsigned CCMIOpc = CCMI->getOpcode();
1654 if (CCMIOpc == TargetOpcode::G_FCMP)
1655 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1656 if (CCMIOpc == TargetOpcode::G_ICMP)
1657 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1658
1659 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1660 // instructions will not be produced, as they are conditional branch
1661 // instructions that do not set flags.
1662 if (ProduceNonFlagSettingCondBr) {
1663 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1664 I.getOperand(1).getMBB(), MIB);
1665 I.eraseFromParent();
1666 return true;
1667 }
1668
1669 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1670 auto TstMI =
1671 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1672 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1673 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1674 .addImm(AArch64CC::EQ)
1675 .addMBB(I.getOperand(1).getMBB());
1676 I.eraseFromParent();
1677 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1678}
1679
1680/// Returns the element immediate value of a vector shift operand if found.
1681/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1682static Optional<int64_t> getVectorShiftImm(Register Reg,
1683 MachineRegisterInfo &MRI) {
1684 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")(static_cast <bool> (MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand") ? void (0) : __assert_fail
("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1684, __extension__ __PRETTY_FUNCTION__))
;
1685 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1686 assert(OpMI && "Expected to find a vreg def for vector shift operand")(static_cast <bool> (OpMI && "Expected to find a vreg def for vector shift operand"
) ? void (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1686, __extension__ __PRETTY_FUNCTION__))
;
1687 return getAArch64VectorSplatScalar(*OpMI, MRI);
1688}
1689
1690/// Matches and returns the shift immediate value for a SHL instruction given
1691/// a shift operand.
1692static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1693 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1694 if (!ShiftImm)
1695 return None;
1696 // Check the immediate is in range for a SHL.
1697 int64_t Imm = *ShiftImm;
1698 if (Imm < 0)
1699 return None;
1700 switch (SrcTy.getElementType().getSizeInBits()) {
1701 default:
1702 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1703 return None;
1704 case 8:
1705 if (Imm > 7)
1706 return None;
1707 break;
1708 case 16:
1709 if (Imm > 15)
1710 return None;
1711 break;
1712 case 32:
1713 if (Imm > 31)
1714 return None;
1715 break;
1716 case 64:
1717 if (Imm > 63)
1718 return None;
1719 break;
1720 }
1721 return Imm;
1722}
1723
1724bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1725 MachineRegisterInfo &MRI) {
1726 assert(I.getOpcode() == TargetOpcode::G_SHL)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
;
1727 Register DstReg = I.getOperand(0).getReg();
1728 const LLT Ty = MRI.getType(DstReg);
1729 Register Src1Reg = I.getOperand(1).getReg();
1730 Register Src2Reg = I.getOperand(2).getReg();
1731
1732 if (!Ty.isVector())
1733 return false;
1734
1735 // Check if we have a vector of constants on RHS that we can select as the
1736 // immediate form.
1737 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1738
1739 unsigned Opc = 0;
1740 if (Ty == LLT::fixed_vector(2, 64)) {
1741 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1742 } else if (Ty == LLT::fixed_vector(4, 32)) {
1743 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1744 } else if (Ty == LLT::fixed_vector(2, 32)) {
1745 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1746 } else if (Ty == LLT::fixed_vector(4, 16)) {
1747 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1748 } else if (Ty == LLT::fixed_vector(8, 16)) {
1749 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1750 } else if (Ty == LLT::fixed_vector(16, 8)) {
1751 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1752 } else if (Ty == LLT::fixed_vector(8, 8)) {
1753 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1754 } else {
1755 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1756 return false;
1757 }
1758
1759 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1760 if (ImmVal)
1761 Shl.addImm(*ImmVal);
1762 else
1763 Shl.addUse(Src2Reg);
1764 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1765 I.eraseFromParent();
1766 return true;
1767}
1768
1769bool AArch64InstructionSelector::selectVectorAshrLshr(
1770 MachineInstr &I, MachineRegisterInfo &MRI) {
1771 assert(I.getOpcode() == TargetOpcode::G_ASHR ||(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1772, __extension__ __PRETTY_FUNCTION__))
1772 I.getOpcode() == TargetOpcode::G_LSHR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_ASHR
|| I.getOpcode() == TargetOpcode::G_LSHR) ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1772, __extension__ __PRETTY_FUNCTION__))
;
1773 Register DstReg = I.getOperand(0).getReg();
1774 const LLT Ty = MRI.getType(DstReg);
1775 Register Src1Reg = I.getOperand(1).getReg();
1776 Register Src2Reg = I.getOperand(2).getReg();
1777
1778 if (!Ty.isVector())
1779 return false;
1780
1781 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1782
1783 // We expect the immediate case to be lowered in the PostLegalCombiner to
1784 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1785
1786 // There is not a shift right register instruction, but the shift left
1787 // register instruction takes a signed value, where negative numbers specify a
1788 // right shift.
1789
1790 unsigned Opc = 0;
1791 unsigned NegOpc = 0;
1792 const TargetRegisterClass *RC =
1793 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1794 if (Ty == LLT::fixed_vector(2, 64)) {
1795 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1796 NegOpc = AArch64::NEGv2i64;
1797 } else if (Ty == LLT::fixed_vector(4, 32)) {
1798 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1799 NegOpc = AArch64::NEGv4i32;
1800 } else if (Ty == LLT::fixed_vector(2, 32)) {
1801 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1802 NegOpc = AArch64::NEGv2i32;
1803 } else if (Ty == LLT::fixed_vector(4, 16)) {
1804 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1805 NegOpc = AArch64::NEGv4i16;
1806 } else if (Ty == LLT::fixed_vector(8, 16)) {
1807 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1808 NegOpc = AArch64::NEGv8i16;
1809 } else if (Ty == LLT::fixed_vector(16, 8)) {
1810 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1811 NegOpc = AArch64::NEGv16i8;
1812 } else if (Ty == LLT::fixed_vector(8, 8)) {
1813 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1814 NegOpc = AArch64::NEGv8i8;
1815 } else {
1816 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1817 return false;
1818 }
1819
1820 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1821 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1822 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1823 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1824 I.eraseFromParent();
1825 return true;
1826}
1827
1828bool AArch64InstructionSelector::selectVaStartAAPCS(
1829 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1830 return false;
1831}
1832
1833bool AArch64InstructionSelector::selectVaStartDarwin(
1834 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1835 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1836 Register ListReg = I.getOperand(0).getReg();
1837
1838 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1839
1840 auto MIB =
1841 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1842 .addDef(ArgsAddrReg)
1843 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1844 .addImm(0)
1845 .addImm(0);
1846
1847 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1848
1849 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1850 .addUse(ArgsAddrReg)
1851 .addUse(ListReg)
1852 .addImm(0)
1853 .addMemOperand(*I.memoperands_begin());
1854
1855 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1856 I.eraseFromParent();
1857 return true;
1858}
1859
1860void AArch64InstructionSelector::materializeLargeCMVal(
1861 MachineInstr &I, const Value *V, unsigned OpFlags) {
1862 MachineBasicBlock &MBB = *I.getParent();
1863 MachineFunction &MF = *MBB.getParent();
1864 MachineRegisterInfo &MRI = MF.getRegInfo();
1865
1866 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1867 MovZ->addOperand(MF, I.getOperand(1));
1868 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1869 AArch64II::MO_NC);
1870 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1871 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1872
1873 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1874 Register ForceDstReg) {
1875 Register DstReg = ForceDstReg
1876 ? ForceDstReg
1877 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1878 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1879 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1880 MovI->addOperand(MF, MachineOperand::CreateGA(
1881 GV, MovZ->getOperand(1).getOffset(), Flags));
1882 } else {
1883 MovI->addOperand(
1884 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1885 MovZ->getOperand(1).getOffset(), Flags));
1886 }
1887 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1888 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1889 return DstReg;
1890 };
1891 Register DstReg = BuildMovK(MovZ.getReg(0),
1892 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1893 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1894 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1895}
1896
1897bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1898 MachineBasicBlock &MBB = *I.getParent();
1899 MachineFunction &MF = *MBB.getParent();
1900 MachineRegisterInfo &MRI = MF.getRegInfo();
1901
1902 switch (I.getOpcode()) {
1903 case TargetOpcode::G_SHL:
1904 case TargetOpcode::G_ASHR:
1905 case TargetOpcode::G_LSHR: {
1906 // These shifts are legalized to have 64 bit shift amounts because we want
1907 // to take advantage of the existing imported selection patterns that assume
1908 // the immediates are s64s. However, if the shifted type is 32 bits and for
1909 // some reason we receive input GMIR that has an s64 shift amount that's not
1910 // a G_CONSTANT, insert a truncate so that we can still select the s32
1911 // register-register variant.
1912 Register SrcReg = I.getOperand(1).getReg();
1913 Register ShiftReg = I.getOperand(2).getReg();
1914 const LLT ShiftTy = MRI.getType(ShiftReg);
1915 const LLT SrcTy = MRI.getType(SrcReg);
1916 if (SrcTy.isVector())
1917 return false;
1918 assert(!ShiftTy.isVector() && "unexpected vector shift ty")(static_cast <bool> (!ShiftTy.isVector() && "unexpected vector shift ty"
) ? void (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1918, __extension__ __PRETTY_FUNCTION__))
;
1919 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1920 return false;
1921 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1922 assert(AmtMI && "could not find a vreg definition for shift amount")(static_cast <bool> (AmtMI && "could not find a vreg definition for shift amount"
) ? void (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1922, __extension__ __PRETTY_FUNCTION__))
;
1923 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1924 // Insert a subregister copy to implement a 64->32 trunc
1925 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1926 .addReg(ShiftReg, 0, AArch64::sub_32);
1927 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1928 I.getOperand(2).setReg(Trunc.getReg(0));
1929 }
1930 return true;
1931 }
1932 case TargetOpcode::G_STORE: {
1933 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1934 MachineOperand &SrcOp = I.getOperand(0);
1935 if (MRI.getType(SrcOp.getReg()).isPointer()) {
1936 // Allow matching with imported patterns for stores of pointers. Unlike
1937 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1938 // and constrain.
1939 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1940 Register NewSrc = Copy.getReg(0);
1941 SrcOp.setReg(NewSrc);
1942 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1943 Changed = true;
1944 }
1945 return Changed;
1946 }
1947 case TargetOpcode::G_PTR_ADD:
1948 return convertPtrAddToAdd(I, MRI);
1949 case TargetOpcode::G_LOAD: {
1950 // For scalar loads of pointers, we try to convert the dest type from p0
1951 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1952 // conversion, this should be ok because all users should have been
1953 // selected already, so the type doesn't matter for them.
1954 Register DstReg = I.getOperand(0).getReg();
1955 const LLT DstTy = MRI.getType(DstReg);
1956 if (!DstTy.isPointer())
1957 return false;
1958 MRI.setType(DstReg, LLT::scalar(64));
1959 return true;
1960 }
1961 case AArch64::G_DUP: {
1962 // Convert the type from p0 to s64 to help selection.
1963 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1964 if (!DstTy.getElementType().isPointer())
1965 return false;
1966 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1967 MRI.setType(I.getOperand(0).getReg(),
1968 DstTy.changeElementType(LLT::scalar(64)));
1969 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1970 I.getOperand(1).setReg(NewSrc.getReg(0));
1971 return true;
1972 }
1973 case TargetOpcode::G_UITOFP:
1974 case TargetOpcode::G_SITOFP: {
1975 // If both source and destination regbanks are FPR, then convert the opcode
1976 // to G_SITOF so that the importer can select it to an fpr variant.
1977 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1978 // copy.
1979 Register SrcReg = I.getOperand(1).getReg();
1980 LLT SrcTy = MRI.getType(SrcReg);
1981 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1982 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1983 return false;
1984
1985 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1986 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1987 I.setDesc(TII.get(AArch64::G_SITOF));
1988 else
1989 I.setDesc(TII.get(AArch64::G_UITOF));
1990 return true;
1991 }
1992 return false;
1993 }
1994 default:
1995 return false;
1996 }
1997}
1998
1999/// This lowering tries to look for G_PTR_ADD instructions and then converts
2000/// them to a standard G_ADD with a COPY on the source.
2001///
2002/// The motivation behind this is to expose the add semantics to the imported
2003/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2004/// because the selector works bottom up, uses before defs. By the time we
2005/// end up trying to select a G_PTR_ADD, we should have already attempted to
2006/// fold this into addressing modes and were therefore unsuccessful.
2007bool AArch64InstructionSelector::convertPtrAddToAdd(
2008 MachineInstr &I, MachineRegisterInfo &MRI) {
2009 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_PTR_ADD
&& "Expected G_PTR_ADD") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2009, __extension__ __PRETTY_FUNCTION__))
;
2010 Register DstReg = I.getOperand(0).getReg();
2011 Register AddOp1Reg = I.getOperand(1).getReg();
2012 const LLT PtrTy = MRI.getType(DstReg);
2013 if (PtrTy.getAddressSpace() != 0)
2014 return false;
2015
2016 const LLT CastPtrTy =
2017 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2018 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2019 // Set regbanks on the registers.
2020 if (PtrTy.isVector())
2021 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2022 else
2023 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2024
2025 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2026 // %dst(intty) = G_ADD %intbase, off
2027 I.setDesc(TII.get(TargetOpcode::G_ADD));
2028 MRI.setType(DstReg, CastPtrTy);
2029 I.getOperand(1).setReg(PtrToInt.getReg(0));
2030 if (!select(*PtrToInt)) {
2031 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2032 return false;
2033 }
2034
2035 // Also take the opportunity here to try to do some optimization.
2036 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2037 Register NegatedReg;
2038 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2039 return true;
2040 I.getOperand(2).setReg(NegatedReg);
2041 I.setDesc(TII.get(TargetOpcode::G_SUB));
2042 return true;
2043}
2044
2045bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2046 MachineRegisterInfo &MRI) {
2047 // We try to match the immediate variant of LSL, which is actually an alias
2048 // for a special case of UBFM. Otherwise, we fall back to the imported
2049 // selector which will match the register variant.
2050 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_SHL
&& "unexpected op") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2050, __extension__ __PRETTY_FUNCTION__))
;
2051 const auto &MO = I.getOperand(2);
2052 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2053 if (!VRegAndVal)
2054 return false;
2055
2056 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2057 if (DstTy.isVector())
2058 return false;
2059 bool Is64Bit = DstTy.getSizeInBits() == 64;
2060 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2061 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2062
2063 if (!Imm1Fn || !Imm2Fn)
2064 return false;
2065
2066 auto NewI =
2067 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2068 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2069
2070 for (auto &RenderFn : *Imm1Fn)
2071 RenderFn(NewI);
2072 for (auto &RenderFn : *Imm2Fn)
2073 RenderFn(NewI);
2074
2075 I.eraseFromParent();
2076 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2077}
2078
2079bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2080 MachineInstr &I, MachineRegisterInfo &MRI) {
2081 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_STORE
&& "Expected G_STORE") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2081, __extension__ __PRETTY_FUNCTION__))
;
2082 // If we're storing a scalar, it doesn't matter what register bank that
2083 // scalar is on. All that matters is the size.
2084 //
2085 // So, if we see something like this (with a 32-bit scalar as an example):
2086 //
2087 // %x:gpr(s32) = ... something ...
2088 // %y:fpr(s32) = COPY %x:gpr(s32)
2089 // G_STORE %y:fpr(s32)
2090 //
2091 // We can fix this up into something like this:
2092 //
2093 // G_STORE %x:gpr(s32)
2094 //
2095 // And then continue the selection process normally.
2096 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2097 if (!DefDstReg.isValid())
2098 return false;
2099 LLT DefDstTy = MRI.getType(DefDstReg);
2100 Register StoreSrcReg = I.getOperand(0).getReg();
2101 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2102
2103 // If we get something strange like a physical register, then we shouldn't
2104 // go any further.
2105 if (!DefDstTy.isValid())
2106 return false;
2107
2108 // Are the source and dst types the same size?
2109 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2110 return false;
2111
2112 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2113 RBI.getRegBank(DefDstReg, MRI, TRI))
2114 return false;
2115
2116 // We have a cross-bank copy, which is entering a store. Let's fold it.
2117 I.getOperand(0).setReg(DefDstReg);
2118 return true;
2119}
2120
2121bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2122 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2122, __extension__ __PRETTY_FUNCTION__))
;
2123 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2123, __extension__ __PRETTY_FUNCTION__))
;
2124
2125 MachineBasicBlock &MBB = *I.getParent();
2126 MachineFunction &MF = *MBB.getParent();
2127 MachineRegisterInfo &MRI = MF.getRegInfo();
2128
2129 switch (I.getOpcode()) {
2130 case AArch64::G_DUP: {
2131 // Before selecting a DUP instruction, check if it is better selected as a
2132 // MOV or load from a constant pool.
2133 Register Src = I.getOperand(1).getReg();
2134 auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
2135 if (!ValAndVReg)
2136 return false;
2137 LLVMContext &Ctx = MF.getFunction().getContext();
2138 Register Dst = I.getOperand(0).getReg();
2139 auto *CV = ConstantDataVector::getSplat(
2140 MRI.getType(Dst).getNumElements(),
2141 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2142 ValAndVReg->Value));
2143 if (!emitConstantVector(Dst, CV, MIB, MRI))
2144 return false;
2145 I.eraseFromParent();
2146 return true;
2147 }
2148 case TargetOpcode::G_BR: {
2149 // If the branch jumps to the fallthrough block, don't bother emitting it.
2150 // Only do this for -O0 for a good code size improvement, because when
2151 // optimizations are enabled we want to leave this choice to
2152 // MachineBlockPlacement.
2153 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2154 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2155 return false;
2156 I.eraseFromParent();
2157 return true;
2158 }
2159 case TargetOpcode::G_SHL:
2160 return earlySelectSHL(I, MRI);
2161 case TargetOpcode::G_CONSTANT: {
2162 bool IsZero = false;
2163 if (I.getOperand(1).isCImm())
2164 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2165 else if (I.getOperand(1).isImm())
2166 IsZero = I.getOperand(1).getImm() == 0;
2167
2168 if (!IsZero)
2169 return false;
2170
2171 Register DefReg = I.getOperand(0).getReg();
2172 LLT Ty = MRI.getType(DefReg);
2173 if (Ty.getSizeInBits() == 64) {
2174 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2175 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2176 } else if (Ty.getSizeInBits() == 32) {
2177 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2178 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2179 } else
2180 return false;
2181
2182 I.setDesc(TII.get(TargetOpcode::COPY));
2183 return true;
2184 }
2185
2186 case TargetOpcode::G_ADD: {
2187 // Check if this is being fed by a G_ICMP on either side.
2188 //
2189 // (cmp pred, x, y) + z
2190 //
2191 // In the above case, when the cmp is true, we increment z by 1. So, we can
2192 // fold the add into the cset for the cmp by using cinc.
2193 //
2194 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2195 Register X = I.getOperand(1).getReg();
2196
2197 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
2198 // early if we see it.
2199 LLT Ty = MRI.getType(X);
2200 if (Ty.isVector() || Ty.getSizeInBits() != 32)
2201 return false;
2202
2203 Register CmpReg = I.getOperand(2).getReg();
2204 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2205 if (!Cmp) {
2206 std::swap(X, CmpReg);
2207 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
2208 if (!Cmp)
2209 return false;
2210 }
2211 auto Pred =
2212 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
2213 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
2214 Cmp->getOperand(1), MIB);
2215 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
2216 I.eraseFromParent();
2217 return true;
2218 }
2219 case TargetOpcode::G_OR: {
2220 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2221 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2222 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2223 Register Dst = I.getOperand(0).getReg();
2224 LLT Ty = MRI.getType(Dst);
2225
2226 if (!Ty.isScalar())
2227 return false;
2228
2229 unsigned Size = Ty.getSizeInBits();
2230 if (Size != 32 && Size != 64)
2231 return false;
2232
2233 Register ShiftSrc;
2234 int64_t ShiftImm;
2235 Register MaskSrc;
2236 int64_t MaskImm;
2237 if (!mi_match(
2238 Dst, MRI,
2239 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2240 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2241 return false;
2242
2243 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2244 return false;
2245
2246 int64_t Immr = Size - ShiftImm;
2247 int64_t Imms = Size - ShiftImm - 1;
2248 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2249 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2250 I.eraseFromParent();
2251 return true;
2252 }
2253 default:
2254 return false;
2255 }
2256}
2257
2258bool AArch64InstructionSelector::select(MachineInstr &I) {
2259 assert(I.getParent() && "Instruction should be in a basic block!")(static_cast <bool> (I.getParent() && "Instruction should be in a basic block!"
) ? void (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2259, __extension__ __PRETTY_FUNCTION__))
;
2260 assert(I.getParent()->getParent() && "Instruction should be in a function!")(static_cast <bool> (I.getParent()->getParent() &&
"Instruction should be in a function!") ? void (0) : __assert_fail
("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2260, __extension__ __PRETTY_FUNCTION__))
;
2261
2262 MachineBasicBlock &MBB = *I.getParent();
2263 MachineFunction &MF = *MBB.getParent();
2264 MachineRegisterInfo &MRI = MF.getRegInfo();
2265
2266 const AArch64Subtarget *Subtarget =
2267 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2268 if (Subtarget->requiresStrictAlign()) {
2269 // We don't support this feature yet.
2270 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2271 return false;
2272 }
2273
2274 MIB.setInstrAndDebugLoc(I);
2275
2276 unsigned Opcode = I.getOpcode();
2277 // G_PHI requires same handling as PHI
2278 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2279 // Certain non-generic instructions also need some special handling.
2280
2281 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2282 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2283
2284 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2285 const Register DefReg = I.getOperand(0).getReg();
2286 const LLT DefTy = MRI.getType(DefReg);
2287
2288 const RegClassOrRegBank &RegClassOrBank =
2289 MRI.getRegClassOrRegBank(DefReg);
2290
2291 const TargetRegisterClass *DefRC
2292 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2293 if (!DefRC) {
2294 if (!DefTy.isValid()) {
2295 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2296 return false;
2297 }
2298 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2299 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2300 if (!DefRC) {
2301 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2302 return false;
2303 }
2304 }
2305
2306 I.setDesc(TII.get(TargetOpcode::PHI));
2307
2308 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2309 }
2310
2311 if (I.isCopy())
2312 return selectCopy(I, TII, MRI, TRI, RBI);
2313
2314 return true;
2315 }
2316
2317
2318 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2319 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2320 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2321 return false;
2322 }
2323
2324 // Try to do some lowering before we start instruction selecting. These
2325 // lowerings are purely transformations on the input G_MIR and so selection
2326 // must continue after any modification of the instruction.
2327 if (preISelLower(I)) {
2328 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2329 }
2330
2331 // There may be patterns where the importer can't deal with them optimally,
2332 // but does select it to a suboptimal sequence so our custom C++ selection
2333 // code later never has a chance to work on it. Therefore, we have an early
2334 // selection attempt here to give priority to certain selection routines
2335 // over the imported ones.
2336 if (earlySelect(I))
2337 return true;
2338
2339 if (selectImpl(I, *CoverageInfo))
2340 return true;
2341
2342 LLT Ty =
2343 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2344
2345 switch (Opcode) {
2346 case TargetOpcode::G_SBFX:
2347 case TargetOpcode::G_UBFX: {
2348 static const unsigned OpcTable[2][2] = {
2349 {AArch64::UBFMWri, AArch64::UBFMXri},
2350 {AArch64::SBFMWri, AArch64::SBFMXri}};
2351 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2352 unsigned Size = Ty.getSizeInBits();
2353 unsigned Opc = OpcTable[IsSigned][Size == 64];
2354 auto Cst1 =
2355 getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2356 assert(Cst1 && "Should have gotten a constant for src 1?")(static_cast <bool> (Cst1 && "Should have gotten a constant for src 1?"
) ? void (0) : __assert_fail ("Cst1 && \"Should have gotten a constant for src 1?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2356, __extension__ __PRETTY_FUNCTION__))
;
2357 auto Cst2 =
2358 getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2359 assert(Cst2 && "Should have gotten a constant for src 2?")(static_cast <bool> (Cst2 && "Should have gotten a constant for src 2?"
) ? void (0) : __assert_fail ("Cst2 && \"Should have gotten a constant for src 2?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2359, __extension__ __PRETTY_FUNCTION__))
;
2360 auto LSB = Cst1->Value.getZExtValue();
2361 auto Width = Cst2->Value.getZExtValue();
2362 auto BitfieldInst =
2363 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2364 .addImm(LSB)
2365 .addImm(LSB + Width - 1);
2366 I.eraseFromParent();
2367 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2368 }
2369 case TargetOpcode::G_BRCOND:
2370 return selectCompareBranch(I, MF, MRI);
2371
2372 case TargetOpcode::G_BRINDIRECT: {
2373 I.setDesc(TII.get(AArch64::BR));
2374 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2375 }
2376
2377 case TargetOpcode::G_BRJT:
2378 return selectBrJT(I, MRI);
2379
2380 case AArch64::G_ADD_LOW: {
2381 // This op may have been separated from it's ADRP companion by the localizer
2382 // or some other code motion pass. Given that many CPUs will try to
2383 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2384 // which will later be expanded into an ADRP+ADD pair after scheduling.
2385 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2386 if (BaseMI->getOpcode() != AArch64::ADRP) {
2387 I.setDesc(TII.get(AArch64::ADDXri));
2388 I.addOperand(MachineOperand::CreateImm(0));
2389 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2390 }
2391 assert(TM.getCodeModel() == CodeModel::Small &&(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2392, __extension__ __PRETTY_FUNCTION__))
2392 "Expected small code model")(static_cast <bool> (TM.getCodeModel() == CodeModel::Small
&& "Expected small code model") ? void (0) : __assert_fail
("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2392, __extension__ __PRETTY_FUNCTION__))
;
2393 auto Op1 = BaseMI->getOperand(1);
2394 auto Op2 = I.getOperand(2);
2395 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2396 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2397 Op1.getTargetFlags())
2398 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2399 Op2.getTargetFlags());
2400 I.eraseFromParent();
2401 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2402 }
2403
2404 case TargetOpcode::G_BSWAP: {
2405 // Handle vector types for G_BSWAP directly.
2406 Register DstReg = I.getOperand(0).getReg();
2407 LLT DstTy = MRI.getType(DstReg);
2408
2409 // We should only get vector types here; everything else is handled by the
2410 // importer right now.
2411 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2412 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2413 return false;
2414 }
2415
2416 // Only handle 4 and 2 element vectors for now.
2417 // TODO: 16-bit elements.
2418 unsigned NumElts = DstTy.getNumElements();
2419 if (NumElts != 4 && NumElts != 2) {
2420 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2421 return false;
2422 }
2423
2424 // Choose the correct opcode for the supported types. Right now, that's
2425 // v2s32, v4s32, and v2s64.
2426 unsigned Opc = 0;
2427 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2428 if (EltSize == 32)
2429 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2430 : AArch64::REV32v16i8;
2431 else if (EltSize == 64)
2432 Opc = AArch64::REV64v16i8;
2433
2434 // We should always get something by the time we get here...
2435 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")(static_cast <bool> (Opc != 0 && "Didn't get an opcode for G_BSWAP?"
) ? void (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2435, __extension__ __PRETTY_FUNCTION__))
;
2436
2437 I.setDesc(TII.get(Opc));
2438 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2439 }
2440
2441 case TargetOpcode::G_FCONSTANT:
2442 case TargetOpcode::G_CONSTANT: {
2443 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2444
2445 const LLT s8 = LLT::scalar(8);
2446 const LLT s16 = LLT::scalar(16);
2447 const LLT s32 = LLT::scalar(32);
2448 const LLT s64 = LLT::scalar(64);
2449 const LLT s128 = LLT::scalar(128);
2450 const LLT p0 = LLT::pointer(0, 64);
2451
2452 const Register DefReg = I.getOperand(0).getReg();
2453 const LLT DefTy = MRI.getType(DefReg);
2454 const unsigned DefSize = DefTy.getSizeInBits();
2455 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2456
2457 // FIXME: Redundant check, but even less readable when factored out.
2458 if (isFP) {
2459 if (Ty != s32 && Ty != s64 && Ty != s128) {
2460 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2461 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2462 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
;
2463 return false;
2464 }
2465
2466 if (RB.getID() != AArch64::FPRRegBankID) {
2467 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2468 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2469 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2470 return false;
2471 }
2472
2473 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2474 // can be sure tablegen works correctly and isn't rescued by this code.
2475 // 0.0 is not covered by tablegen for FP128. So we will handle this
2476 // scenario in the code here.
2477 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2478 return false;
2479 } else {
2480 // s32 and s64 are covered by tablegen.
2481 if (Ty != p0 && Ty != s8 && Ty != s16) {
2482 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2483 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2484 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2485 return false;
2486 }
2487
2488 if (RB.getID() != AArch64::GPRRegBankID) {
2489 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2490 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2491 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2492 return false;
2493 }
2494 }
2495
2496 // We allow G_CONSTANT of types < 32b.
2497 const unsigned MovOpc =
2498 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2499
2500 if (isFP) {
2501 // Either emit a FMOV, or emit a copy to emit a normal mov.
2502 const TargetRegisterClass &GPRRC =
2503 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2504 const TargetRegisterClass &FPRRC =
2505 DefSize == 32 ? AArch64::FPR32RegClass
2506 : (DefSize == 64 ? AArch64::FPR64RegClass
2507 : AArch64::FPR128RegClass);
2508
2509 // For 64b values, emit a constant pool load instead.
2510 // For s32, use a cp load if we have optsize/minsize.
2511 if (DefSize == 64 || DefSize == 128 ||
2512 (DefSize == 32 && shouldOptForSize(&MF))) {
2513 auto *FPImm = I.getOperand(1).getFPImm();
2514 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2515 if (!LoadMI) {
2516 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2517 return false;
2518 }
2519 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2520 I.eraseFromParent();
2521 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2522 }
2523
2524 // Nope. Emit a copy and use a normal mov instead.
2525 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2526 MachineOperand &RegOp = I.getOperand(0);
2527 RegOp.setReg(DefGPRReg);
2528 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2529 MIB.buildCopy({DefReg}, {DefGPRReg});
2530
2531 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2532 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2533 return false;
2534 }
2535
2536 MachineOperand &ImmOp = I.getOperand(1);
2537 // FIXME: Is going through int64_t always correct?
2538 ImmOp.ChangeToImmediate(
2539 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2540 } else if (I.getOperand(1).isCImm()) {
2541 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2542 I.getOperand(1).ChangeToImmediate(Val);
2543 } else if (I.getOperand(1).isImm()) {
2544 uint64_t Val = I.getOperand(1).getImm();
2545 I.getOperand(1).ChangeToImmediate(Val);
2546 }
2547
2548 I.setDesc(TII.get(MovOpc));
2549 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2550 return true;
2551 }
2552 case TargetOpcode::G_EXTRACT: {
2553 Register DstReg = I.getOperand(0).getReg();
2554 Register SrcReg = I.getOperand(1).getReg();
2555 LLT SrcTy = MRI.getType(SrcReg);
2556 LLT DstTy = MRI.getType(DstReg);
2557 (void)DstTy;
2558 unsigned SrcSize = SrcTy.getSizeInBits();
2559
2560 if (SrcTy.getSizeInBits() > 64) {
2561 // This should be an extract of an s128, which is like a vector extract.
2562 if (SrcTy.getSizeInBits() != 128)
2563 return false;
2564 // Only support extracting 64 bits from an s128 at the moment.
2565 if (DstTy.getSizeInBits() != 64)
2566 return false;
2567
2568 unsigned Offset = I.getOperand(2).getImm();
2569 if (Offset % 64 != 0)
2570 return false;
2571
2572 // Check we have the right regbank always.
2573 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2574 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2575 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!")(static_cast <bool> (SrcRB.getID() == DstRB.getID() &&
"Wrong extract regbank!") ? void (0) : __assert_fail ("SrcRB.getID() == DstRB.getID() && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2575, __extension__ __PRETTY_FUNCTION__))
;
2576
2577 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2578 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2579 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2580 I.eraseFromParent();
2581 return true;
2582 }
2583
2584 // Emit the same code as a vector extract.
2585 // Offset must be a multiple of 64.
2586 unsigned LaneIdx = Offset / 64;
2587 MachineInstr *Extract = emitExtractVectorElt(
2588 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2589 if (!Extract)
2590 return false;
2591 I.eraseFromParent();
2592 return true;
2593 }
2594
2595 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2596 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2597 Ty.getSizeInBits() - 1);
2598
2599 if (SrcSize < 64) {
2600 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2601, __extension__ __PRETTY_FUNCTION__))
2601 "unexpected G_EXTRACT types")(static_cast <bool> (SrcSize == 32 && DstTy.getSizeInBits
() == 16 && "unexpected G_EXTRACT types") ? void (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2601, __extension__ __PRETTY_FUNCTION__))
;
2602 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2603 }
2604
2605 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2606 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2607 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2608 .addReg(DstReg, 0, AArch64::sub_32);
2609 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2610 AArch64::GPR32RegClass, MRI);
2611 I.getOperand(0).setReg(DstReg);
2612
2613 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2614 }
2615
2616 case TargetOpcode::G_INSERT: {
2617 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2618 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2619 unsigned DstSize = DstTy.getSizeInBits();
2620 // Larger inserts are vectors, same-size ones should be something else by
2621 // now (split up or turned into COPYs).
2622 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2623 return false;
2624
2625 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2626 unsigned LSB = I.getOperand(3).getImm();
2627 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2628 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2629 MachineInstrBuilder(MF, I).addImm(Width - 1);
2630
2631 if (DstSize < 64) {
2632 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2633, __extension__ __PRETTY_FUNCTION__))
2633 "unexpected G_INSERT types")(static_cast <bool> (DstSize == 32 && SrcTy.getSizeInBits
() == 16 && "unexpected G_INSERT types") ? void (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2633, __extension__ __PRETTY_FUNCTION__))
;
2634 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2635 }
2636
2637 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2638 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2639 TII.get(AArch64::SUBREG_TO_REG))
2640 .addDef(SrcReg)
2641 .addImm(0)
2642 .addUse(I.getOperand(2).getReg())
2643 .addImm(AArch64::sub_32);
2644 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2645 AArch64::GPR32RegClass, MRI);
2646 I.getOperand(2).setReg(SrcReg);
2647
2648 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2649 }
2650 case TargetOpcode::G_FRAME_INDEX: {
2651 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2652 if (Ty != LLT::pointer(0, 64)) {
2653 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2654 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2655 return false;
2656 }
2657 I.setDesc(TII.get(AArch64::ADDXri));
2658
2659 // MOs for a #0 shifted immediate.
2660 I.addOperand(MachineOperand::CreateImm(0));
2661 I.addOperand(MachineOperand::CreateImm(0));
2662
2663 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2664 }
2665
2666 case TargetOpcode::G_GLOBAL_VALUE: {
2667 auto GV = I.getOperand(1).getGlobal();
2668 if (GV->isThreadLocal())
2669 return selectTLSGlobalValue(I, MRI);
2670
2671 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2672 if (OpFlags & AArch64II::MO_GOT) {
2673 I.setDesc(TII.get(AArch64::LOADgot));
2674 I.getOperand(1).setTargetFlags(OpFlags);
2675 } else if (TM.getCodeModel() == CodeModel::Large) {
2676 // Materialize the global using movz/movk instructions.
2677 materializeLargeCMVal(I, GV, OpFlags);
2678 I.eraseFromParent();
2679 return true;
2680 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2681 I.setDesc(TII.get(AArch64::ADR));
2682 I.getOperand(1).setTargetFlags(OpFlags);
2683 } else {
2684 I.setDesc(TII.get(AArch64::MOVaddr));
2685 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2686 MachineInstrBuilder MIB(MF, I);
2687 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2688 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2689 }
2690 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2691 }
2692
2693 case TargetOpcode::G_ZEXTLOAD:
2694 case TargetOpcode::G_LOAD:
2695 case TargetOpcode::G_STORE: {
2696 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2697 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2698
2699 if (PtrTy != LLT::pointer(0, 64)) {
2700 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2701 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2702 return false;
2703 }
2704
2705 auto &MemOp = **I.memoperands_begin();
2706 uint64_t MemSizeInBytes = MemOp.getSize();
2707 unsigned MemSizeInBits = MemSizeInBytes * 8;
2708 AtomicOrdering Order = MemOp.getSuccessOrdering();
2709
2710 // Need special instructions for atomics that affect ordering.
2711 if (Order != AtomicOrdering::NotAtomic &&
2712 Order != AtomicOrdering::Unordered &&
2713 Order != AtomicOrdering::Monotonic) {
2714 assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD)(static_cast <bool> (I.getOpcode() != TargetOpcode::G_ZEXTLOAD
) ? void (0) : __assert_fail ("I.getOpcode() != TargetOpcode::G_ZEXTLOAD"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2714, __extension__ __PRETTY_FUNCTION__))
;
2715 if (MemSizeInBytes > 64)
2716 return false;
2717
2718 if (I.getOpcode() == TargetOpcode::G_LOAD) {
2719 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2720 AArch64::LDARW, AArch64::LDARX};
2721 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2722 } else {
2723 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2724 AArch64::STLRW, AArch64::STLRX};
2725 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2726 }
2727 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2728 return true;
2729 }
2730
2731#ifndef NDEBUG
2732 const Register PtrReg = I.getOperand(1).getReg();
2733 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2734 // Sanity-check the pointer register.
2735 assert(PtrRB.getID() == AArch64::GPRRegBankID &&(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2736, __extension__ __PRETTY_FUNCTION__))
2736 "Load/Store pointer operand isn't a GPR")(static_cast <bool> (PtrRB.getID() == AArch64::GPRRegBankID
&& "Load/Store pointer operand isn't a GPR") ? void (
0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2736, __extension__ __PRETTY_FUNCTION__))
;
2737 assert(MRI.getType(PtrReg).isPointer() &&(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2738, __extension__ __PRETTY_FUNCTION__))
2738 "Load/Store pointer operand isn't a pointer")(static_cast <bool> (MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer") ? void (0) : __assert_fail
("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2738, __extension__ __PRETTY_FUNCTION__))
;
2739#endif
2740
2741 const Register ValReg = I.getOperand(0).getReg();
2742 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2743
2744 // Helper lambda for partially selecting I. Either returns the original
2745 // instruction with an updated opcode, or a new instruction.
2746 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2747 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2748 const unsigned NewOpc =
2749 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2750 if (NewOpc == I.getOpcode())
2751 return nullptr;
2752 // Check if we can fold anything into the addressing mode.
2753 auto AddrModeFns =
2754 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2755 if (!AddrModeFns) {
2756 // Can't fold anything. Use the original instruction.
2757 I.setDesc(TII.get(NewOpc));
2758 I.addOperand(MachineOperand::CreateImm(0));
2759 return &I;
2760 }
2761
2762 // Folded something. Create a new instruction and return it.
2763 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2764 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2765 NewInst.cloneMemRefs(I);
2766 for (auto &Fn : *AddrModeFns)
2767 Fn(NewInst);
2768 I.eraseFromParent();
2769 return &*NewInst;
2770 };
2771
2772 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2773 if (!LoadStore)
2774 return false;
2775
2776 // If we're storing a 0, use WZR/XZR.
2777 if (Opcode == TargetOpcode::G_STORE) {
2778 auto CVal = getConstantVRegValWithLookThrough(
2779 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2780 /*HandleFConstants = */ false);
2781 if (CVal && CVal->Value == 0) {
2782 switch (LoadStore->getOpcode()) {
2783 case AArch64::STRWui:
2784 case AArch64::STRHHui:
2785 case AArch64::STRBBui:
2786 LoadStore->getOperand(0).setReg(AArch64::WZR);
2787 break;
2788 case AArch64::STRXui:
2789 LoadStore->getOperand(0).setReg(AArch64::XZR);
2790 break;
2791 }
2792 }
2793 }
2794
2795 if (IsZExtLoad) {
2796 // The zextload from a smaller type to i32 should be handled by the
2797 // importer.
2798 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2799 return false;
2800 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2801 // and zero_extend with SUBREG_TO_REG.
2802 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2803 Register DstReg = LoadStore->getOperand(0).getReg();
2804 LoadStore->getOperand(0).setReg(LdReg);
2805
2806 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2807 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2808 .addImm(0)
2809 .addUse(LdReg)
2810 .addImm(AArch64::sub_32);
2811 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2812 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2813 MRI);
2814 }
2815 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2816 }
2817
2818 case TargetOpcode::G_SMULH:
2819 case TargetOpcode::G_UMULH: {
2820 // Reject the various things we don't support yet.
2821 if (unsupportedBinOp(I, RBI, MRI, TRI))
2822 return false;
2823
2824 const Register DefReg = I.getOperand(0).getReg();
2825 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2826
2827 if (RB.getID() != AArch64::GPRRegBankID) {
2828 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2829 return false;
2830 }
2831
2832 if (Ty != LLT::scalar(64)) {
2833 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2834 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2835 return false;
2836 }
2837
2838 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2839 : AArch64::UMULHrr;
2840 I.setDesc(TII.get(NewOpc));
2841
2842 // Now that we selected an opcode, we need to constrain the register
2843 // operands to use appropriate classes.
2844 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2845 }
2846 case TargetOpcode::G_LSHR:
2847 case TargetOpcode::G_ASHR:
2848 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2849 return selectVectorAshrLshr(I, MRI);
2850 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2851 case TargetOpcode::G_SHL:
2852 if (Opcode == TargetOpcode::G_SHL &&
2853 MRI.getType(I.getOperand(0).getReg()).isVector())
2854 return selectVectorSHL(I, MRI);
2855 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2856 case TargetOpcode::G_FADD:
2857 case TargetOpcode::G_FSUB:
2858 case TargetOpcode::G_FMUL:
2859 case TargetOpcode::G_FDIV:
2860 case TargetOpcode::G_OR: {
2861 // Reject the various things we don't support yet.
2862 if (unsupportedBinOp(I, RBI, MRI, TRI))
2863 return false;
2864
2865 const unsigned OpSize = Ty.getSizeInBits();
2866
2867 const Register DefReg = I.getOperand(0).getReg();
2868 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2869
2870 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2871 if (NewOpc == I.getOpcode())
2872 return false;
2873
2874 I.setDesc(TII.get(NewOpc));
2875 // FIXME: Should the type be always reset in setDesc?
2876
2877 // Now that we selected an opcode, we need to constrain the register
2878 // operands to use appropriate classes.
2879 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2880 }
2881
2882 case TargetOpcode::G_PTR_ADD: {
2883 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2884 I.eraseFromParent();
2885 return true;
2886 }
2887 case TargetOpcode::G_SADDO:
2888 case TargetOpcode::G_UADDO:
2889 case TargetOpcode::G_SSUBO:
2890 case TargetOpcode::G_USUBO: {
2891 // Emit the operation and get the correct condition code.
2892 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2893 I.getOperand(2), I.getOperand(3), MIB);
2894
2895 // Now, put the overflow result in the register given by the first operand
2896 // to the overflow op. CSINC increments the result when the predicate is
2897 // false, so to get the increment when it's true, we need to use the
2898 // inverse. In this case, we want to increment when carry is set.
2899 Register ZReg = AArch64::WZR;
2900 auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2901 {ZReg, ZReg})
2902 .addImm(getInvertedCondCode(OpAndCC.second));
2903 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2904 I.eraseFromParent();
2905 return true;
2906 }
2907
2908 case TargetOpcode::G_PTRMASK: {
2909 Register MaskReg = I.getOperand(2).getReg();
2910 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2911 // TODO: Implement arbitrary cases
2912 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2913 return false;
2914
2915 uint64_t Mask = *MaskVal;
2916 I.setDesc(TII.get(AArch64::ANDXri));
2917 I.getOperand(2).ChangeToImmediate(
2918 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2919
2920 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2921 }
2922 case TargetOpcode::G_PTRTOINT:
2923 case TargetOpcode::G_TRUNC: {
2924 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2925 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2926
2927 const Register DstReg = I.getOperand(0).getReg();
2928 const Register SrcReg = I.getOperand(1).getReg();
2929
2930 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2931 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2932
2933 if (DstRB.getID() != SrcRB.getID()) {
2934 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2935 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2936 return false;
2937 }
2938
2939 if (DstRB.getID() == AArch64::GPRRegBankID) {
2940 const TargetRegisterClass *DstRC =
2941 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2942 if (!DstRC)
2943 return false;
2944
2945 const TargetRegisterClass *SrcRC =
2946 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2947 if (!SrcRC)
2948 return false;
2949
2950 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2951 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2952 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2953 return false;
2954 }
2955
2956 if (DstRC == SrcRC) {
2957 // Nothing to be done
2958 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2959 SrcTy == LLT::scalar(64)) {
2960 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2960)
;
2961 return false;
2962 } else if (DstRC == &AArch64::GPR32RegClass &&
2963 SrcRC == &AArch64::GPR64RegClass) {
2964 I.getOperand(1).setSubReg(AArch64::sub_32);
2965 } else {
2966 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2967 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2968 return false;
2969 }
2970
2971 I.setDesc(TII.get(TargetOpcode::COPY));
2972 return true;
2973 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2974 if (DstTy == LLT::fixed_vector(4, 16) &&
2975 SrcTy == LLT::fixed_vector(4, 32)) {
2976 I.setDesc(TII.get(AArch64::XTNv4i16));
2977 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2978 return true;
2979 }
2980
2981 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2982 MachineInstr *Extract = emitExtractVectorElt(
2983 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2984 if (!Extract)
2985 return false;
2986 I.eraseFromParent();
2987 return true;
2988 }
2989
2990 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2991 if (Opcode == TargetOpcode::G_PTRTOINT) {
2992 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")(static_cast <bool> (DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? void (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2992, __extension__ __PRETTY_FUNCTION__))
;
2993 I.setDesc(TII.get(TargetOpcode::COPY));
2994 return true;
2995 }
2996 }
2997
2998 return false;
2999 }
3000
3001 case TargetOpcode::G_ANYEXT: {
3002 const Register DstReg = I.getOperand(0).getReg();
3003 const Register SrcReg = I.getOperand(1).getReg();
3004
3005 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3006 if (RBDst.getID() != AArch64::GPRRegBankID) {
3007 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
3008 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
3009 return false;
3010 }
3011
3012 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3013 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3014 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
3015 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
3016 return false;
3017 }
3018
3019 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3020
3021 if (DstSize == 0) {
3022 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
3023 return false;
3024 }
3025
3026 if (DstSize != 64 && DstSize > 32) {
3027 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
3028 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
3029 return false;
3030 }
3031 // At this point G_ANYEXT is just like a plain COPY, but we need
3032 // to explicitly form the 64-bit value if any.
3033 if (DstSize > 32) {
3034 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3035 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3036 .addDef(ExtSrc)
3037 .addImm(0)
3038 .addUse(SrcReg)
3039 .addImm(AArch64::sub_32);
3040 I.getOperand(1).setReg(ExtSrc);
3041 }
3042 return selectCopy(I, TII, MRI, TRI, RBI);
3043 }
3044
3045 case TargetOpcode::G_ZEXT:
3046 case TargetOpcode::G_SEXT_INREG:
3047 case TargetOpcode::G_SEXT: {
3048 unsigned Opcode = I.getOpcode();
3049 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3050 const Register DefReg = I.getOperand(0).getReg();
3051 Register SrcReg = I.getOperand(1).getReg();
3052 const LLT DstTy = MRI.getType(DefReg);
3053 const LLT SrcTy = MRI.getType(SrcReg);
3054 unsigned DstSize = DstTy.getSizeInBits();
3055 unsigned SrcSize = SrcTy.getSizeInBits();
3056
3057 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3058 // extended is encoded in the imm.
3059 if (Opcode == TargetOpcode::G_SEXT_INREG)
3060 SrcSize = I.getOperand(2).getImm();
3061
3062 if (DstTy.isVector())
3063 return false; // Should be handled by imported patterns.
3064
3065 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3067, __extension__ __PRETTY_FUNCTION__))
3066 AArch64::GPRRegBankID &&(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3067, __extension__ __PRETTY_FUNCTION__))
3067 "Unexpected ext regbank")(static_cast <bool> ((*RBI.getRegBank(DefReg, MRI, TRI)
).getID() == AArch64::GPRRegBankID && "Unexpected ext regbank"
) ? void (0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3067, __extension__ __PRETTY_FUNCTION__))
;
3068
3069 MachineInstr *ExtI;
3070
3071 // First check if we're extending the result of a load which has a dest type
3072 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3073 // GPR register on AArch64 and all loads which are smaller automatically
3074 // zero-extend the upper bits. E.g.
3075 // %v(s8) = G_LOAD %p, :: (load 1)
3076 // %v2(s32) = G_ZEXT %v(s8)
3077 if (!IsSigned) {
3078 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3079 bool IsGPR =
3080 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3081 if (LoadMI && IsGPR) {
3082 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3083 unsigned BytesLoaded = MemOp->getSize();
3084 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3085 return selectCopy(I, TII, MRI, TRI, RBI);
3086 }
3087
3088 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3089 // + SUBREG_TO_REG.
3090 //
3091 // If we are zero extending from 32 bits to 64 bits, it's possible that
3092 // the instruction implicitly does the zero extend for us. In that case,
3093 // we only need the SUBREG_TO_REG.
3094 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3095 // Unlike with the G_LOAD case, we don't want to look through copies
3096 // here. (See isDef32.)
3097 MachineInstr *Def = MRI.getVRegDef(SrcReg);
3098 Register SubregToRegSrc = SrcReg;
3099
3100 // Does the instruction implicitly zero extend?
3101 if (!Def || !isDef32(*Def)) {
3102 // No. Zero out using an OR.
3103 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3104 const Register ZReg = AArch64::WZR;
3105 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3106 SubregToRegSrc = OrDst;
3107 }
3108
3109 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3110 .addImm(0)
3111 .addUse(SubregToRegSrc)
3112 .addImm(AArch64::sub_32);
3113
3114 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3115 MRI)) {
3116 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
3117 return false;
3118 }
3119
3120 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3121 MRI)) {
3122 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
3123 return false;
3124 }
3125
3126 I.eraseFromParent();
3127 return true;
3128 }
3129 }
3130
3131 if (DstSize == 64) {
3132 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3133 // FIXME: Can we avoid manually doing this?
3134 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3135 MRI)) {
3136 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
3137 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
3138 return false;
3139 }
3140 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3141 {&AArch64::GPR64RegClass}, {})
3142 .addImm(0)
3143 .addUse(SrcReg)
3144 .addImm(AArch64::sub_32)
3145 .getReg(0);
3146 }
3147
3148 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3149 {DefReg}, {SrcReg})
3150 .addImm(0)
3151 .addImm(SrcSize - 1);
3152 } else if (DstSize <= 32) {
3153 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3154 {DefReg}, {SrcReg})
3155 .addImm(0)
3156 .addImm(SrcSize - 1);
3157 } else {
3158 return false;
3159 }
3160
3161 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3162 I.eraseFromParent();
3163 return true;
3164 }
3165
3166 case TargetOpcode::G_SITOFP:
3167 case TargetOpcode::G_UITOFP:
3168 case TargetOpcode::G_FPTOSI:
3169 case TargetOpcode::G_FPTOUI: {
3170 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3171 SrcTy = MRI.getType(I.getOperand(1).getReg());
3172 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3173 if (NewOpc == Opcode)
3174 return false;
3175
3176 I.setDesc(TII.get(NewOpc));
3177 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3178
3179 return true;
3180 }
3181
3182 case TargetOpcode::G_FREEZE:
3183 return selectCopy(I, TII, MRI, TRI, RBI);
3184
3185 case TargetOpcode::G_INTTOPTR:
3186 // The importer is currently unable to import pointer types since they
3187 // didn't exist in SelectionDAG.
3188 return selectCopy(I, TII, MRI, TRI, RBI);
3189
3190 case TargetOpcode::G_BITCAST:
3191 // Imported SelectionDAG rules can handle every bitcast except those that
3192 // bitcast from a type to the same type. Ideally, these shouldn't occur
3193 // but we might not run an optimizer that deletes them. The other exception
3194 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3195 // of them.
3196 return selectCopy(I, TII, MRI, TRI, RBI);
3197
3198 case TargetOpcode::G_SELECT: {
3199 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3200 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3201 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3202 return false;
3203 }
3204
3205 const Register CondReg = I.getOperand(1).getReg();
3206 const Register TReg = I.getOperand(2).getReg();
3207 const Register FReg = I.getOperand(3).getReg();
3208
3209 if (tryOptSelect(I))
3210 return true;
3211
3212 // Make sure to use an unused vreg instead of wzr, so that the peephole
3213 // optimizations will be able to optimize these.
3214 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3215 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3216 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3217 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3218 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3219 return false;
3220 I.eraseFromParent();
3221 return true;
3222 }
3223 case TargetOpcode::G_ICMP: {
3224 if (Ty.isVector())
3225 return selectVectorICmp(I, MRI);
3226
3227 if (Ty != LLT::scalar(32)) {
3228 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3229 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3230 return false;
3231 }
3232
3233 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3234 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3235 MIB);
3236 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
3237 I.eraseFromParent();
3238 return true;
3239 }
3240
3241 case TargetOpcode::G_FCMP: {
3242 CmpInst::Predicate Pred =
3243 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3244 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3245 Pred) ||
3246 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3247 return false;
3248 I.eraseFromParent();
3249 return true;
3250 }
3251 case TargetOpcode::G_VASTART:
3252 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3253 : selectVaStartAAPCS(I, MF, MRI);
3254 case TargetOpcode::G_INTRINSIC:
3255 return selectIntrinsic(I, MRI);
3256 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3257 return selectIntrinsicWithSideEffects(I, MRI);
3258 case TargetOpcode::G_IMPLICIT_DEF: {
3259 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3260 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3261 const Register DstReg = I.getOperand(0).getReg();
3262 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3263 const TargetRegisterClass *DstRC =
3264 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3265 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3266 return true;
3267 }
3268 case TargetOpcode::G_BLOCK_ADDR: {
3269 if (TM.getCodeModel() == CodeModel::Large) {
3270 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3271 I.eraseFromParent();
3272 return true;
3273 } else {
3274 I.setDesc(TII.get(AArch64::MOVaddrBA));
3275 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3276 I.getOperand(0).getReg())
3277 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3278 /* Offset */ 0, AArch64II::MO_PAGE)
3279 .addBlockAddress(
3280 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3281 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3282 I.eraseFromParent();
3283 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3284 }
3285 }
3286 case AArch64::G_DUP: {
3287 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3288 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3289 // difficult because at RBS we may end up pessimizing the fpr case if we
3290 // decided to add an anyextend to fix this. Manual selection is the most
3291 // robust solution for now.
3292 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3293 AArch64::GPRRegBankID)
3294 return false; // We expect the fpr regbank case to be imported.
3295 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3296 if (VecTy == LLT::fixed_vector(8, 8))
3297 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3298 else if (VecTy == LLT::fixed_vector(16, 8))
3299 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3300 else if (VecTy == LLT::fixed_vector(4, 16))
3301 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3302 else if (VecTy == LLT::fixed_vector(8, 16))
3303 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3304 else
3305 return false;
3306 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3307 }
3308 case TargetOpcode::G_INTRINSIC_TRUNC:
3309 return selectIntrinsicTrunc(I, MRI);
3310 case TargetOpcode::G_INTRINSIC_ROUND:
3311 return selectIntrinsicRound(I, MRI);
3312 case TargetOpcode::G_BUILD_VECTOR:
3313 return selectBuildVector(I, MRI);
3314 case TargetOpcode::G_MERGE_VALUES:
3315 return selectMergeValues(I, MRI);
3316 case TargetOpcode::G_UNMERGE_VALUES:
3317 return selectUnmergeValues(I, MRI);
3318 case TargetOpcode::G_SHUFFLE_VECTOR:
3319 return selectShuffleVector(I, MRI);
3320 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3321 return selectExtractElt(I, MRI);
3322 case TargetOpcode::G_INSERT_VECTOR_ELT:
3323 return selectInsertElt(I, MRI);
3324 case TargetOpcode::G_CONCAT_VECTORS:
3325 return selectConcatVectors(I, MRI);
3326 case TargetOpcode::G_JUMP_TABLE:
3327 return selectJumpTable(I, MRI);
3328 case TargetOpcode::G_VECREDUCE_FADD:
3329 case TargetOpcode::G_VECREDUCE_ADD:
3330 return selectReduction(I, MRI);
3331 }
3332
3333 return false;
3334}
3335
3336bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3337 MachineRegisterInfo &MRI) {
3338 Register VecReg = I.getOperand(1).getReg();
3339 LLT VecTy = MRI.getType(VecReg);
3340 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3341 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3342 // a subregister copy afterwards.
3343 if (VecTy == LLT::fixed_vector(2, 32)) {
3344 Register DstReg = I.getOperand(0).getReg();
3345 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3346 {VecReg, VecReg});
3347 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3348 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3349 .getReg(0);
3350 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3351 I.eraseFromParent();
3352 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3353 }
3354
3355 unsigned Opc = 0;
3356 if (VecTy == LLT::fixed_vector(16, 8))
3357 Opc = AArch64::ADDVv16i8v;
3358 else if (VecTy == LLT::fixed_vector(8, 16))
3359 Opc = AArch64::ADDVv8i16v;
3360 else if (VecTy == LLT::fixed_vector(4, 32))
3361 Opc = AArch64::ADDVv4i32v;
3362 else if (VecTy == LLT::fixed_vector(2, 64))
3363 Opc = AArch64::ADDPv2i64p;
3364 else {
3365 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3366 return false;
3367 }
3368 I.setDesc(TII.get(Opc));
3369 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3370 }
3371
3372 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3373 unsigned Opc = 0;
3374 if (VecTy == LLT::fixed_vector(2, 32))
3375 Opc = AArch64::FADDPv2i32p;
3376 else if (VecTy == LLT::fixed_vector(2, 64))
3377 Opc = AArch64::FADDPv2i64p;
3378 else {
3379 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3380 return false;
3381 }
3382 I.setDesc(TII.get(Opc));
3383 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3384 }
3385 return false;
3386}
3387
3388bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3389 MachineRegisterInfo &MRI) {
3390 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BRJT
&& "Expected G_BRJT") ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3390, __extension__ __PRETTY_FUNCTION__))
;
3391 Register JTAddr = I.getOperand(0).getReg();
3392 unsigned JTI = I.getOperand(1).getIndex();
3393 Register Index = I.getOperand(2).getReg();
3394
3395 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3396 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3397
3398 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3399 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3400 {TargetReg, ScratchReg}, {JTAddr, Index})
3401 .addJumpTableIndex(JTI);
3402 // Build the indirect branch.
3403 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3404 I.eraseFromParent();
3405 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3406}
3407
3408bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3409 MachineRegisterInfo &MRI) {
3410 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_JUMP_TABLE
&& "Expected jump table") ? void (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3410, __extension__ __PRETTY_FUNCTION__))
;
3411 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")(static_cast <bool> (I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!") ? void (0) : __assert_fail
("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3411, __extension__ __PRETTY_FUNCTION__))
;
3412
3413 Register DstReg = I.getOperand(0).getReg();
3414 unsigned JTI = I.getOperand(1).getIndex();
3415 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3416 auto MovMI =
3417 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3418 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3419 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3420 I.eraseFromParent();
3421 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3422}
3423
3424bool AArch64InstructionSelector::selectTLSGlobalValue(
3425 MachineInstr &I, MachineRegisterInfo &MRI) {
3426 if (!STI.isTargetMachO())
3427 return false;
3428 MachineFunction &MF = *I.getParent()->getParent();
3429 MF.getFrameInfo().setAdjustsStack(true);
3430
3431 const auto &GlobalOp = I.getOperand(1);
3432 assert(GlobalOp.getOffset() == 0 &&(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3433, __extension__ __PRETTY_FUNCTION__))
3433 "Shouldn't have an offset on TLS globals!")(static_cast <bool> (GlobalOp.getOffset() == 0 &&
"Shouldn't have an offset on TLS globals!") ? void (0) : __assert_fail
("GlobalOp.getOffset() == 0 && \"Shouldn't have an offset on TLS globals!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3433, __extension__ __PRETTY_FUNCTION__))
;
3434 const GlobalValue &GV = *GlobalOp.getGlobal();
3435
3436 auto LoadGOT =
3437 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3438 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3439
3440 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3441 {LoadGOT.getReg(0)})
3442 .addImm(0);
3443
3444 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3445 // TLS calls preserve all registers except those that absolutely must be
3446 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3447 // silly).
3448 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3449 .addUse(AArch64::X0, RegState::Implicit)
3450 .addDef(AArch64::X0, RegState::Implicit)
3451 .addRegMask(TRI.getTLSCallPreservedMask());
3452
3453 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3454 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3455 MRI);
3456 I.eraseFromParent();
3457 return true;
3458}
3459
3460bool AArch64InstructionSelector::selectIntrinsicTrunc(
3461 MachineInstr &I, MachineRegisterInfo &MRI) const {
3462 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3463
3464 // Select the correct opcode.
3465 unsigned Opc = 0;
3466 if (!SrcTy.isVector()) {
3467 switch (SrcTy.getSizeInBits()) {
3468 default:
3469 case 16:
3470 Opc = AArch64::FRINTZHr;
3471 break;
3472 case 32:
3473 Opc = AArch64::FRINTZSr;
3474 break;
3475 case 64:
3476 Opc = AArch64::FRINTZDr;
3477 break;
3478 }
3479 } else {
3480 unsigned NumElts = SrcTy.getNumElements();
3481 switch (SrcTy.getElementType().getSizeInBits()) {
3482 default:
3483 break;
3484 case 16:
3485 if (NumElts == 4)
3486 Opc = AArch64::FRINTZv4f16;
3487 else if (NumElts == 8)
3488 Opc = AArch64::FRINTZv8f16;
3489 break;
3490 case 32:
3491 if (NumElts == 2)
3492 Opc = AArch64::FRINTZv2f32;
3493 else if (NumElts == 4)
3494 Opc = AArch64::FRINTZv4f32;
3495 break;
3496 case 64:
3497 if (NumElts == 2)
3498 Opc = AArch64::FRINTZv2f64;
3499 break;
3500 }
3501 }
3502
3503 if (!Opc) {
3504 // Didn't get an opcode above, bail.
3505 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3506 return false;
3507 }
3508
3509 // Legalization would have set us up perfectly for this; we just need to
3510 // set the opcode and move on.
3511 I.setDesc(TII.get(Opc));
3512 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3513}
3514
3515bool AArch64InstructionSelector::selectIntrinsicRound(
3516 MachineInstr &I, MachineRegisterInfo &MRI) const {
3517 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3518
3519 // Select the correct opcode.
3520 unsigned Opc = 0;
3521 if (!SrcTy.isVector()) {
3522 switch (SrcTy.getSizeInBits()) {
3523 default:
3524 case 16:
3525 Opc = AArch64::FRINTAHr;
3526 break;
3527 case 32:
3528 Opc = AArch64::FRINTASr;
3529 break;
3530 case 64:
3531 Opc = AArch64::FRINTADr;
3532 break;
3533 }
3534 } else {
3535 unsigned NumElts = SrcTy.getNumElements();
3536 switch (SrcTy.getElementType().getSizeInBits()) {
3537 default:
3538 break;
3539 case 16:
3540 if (NumElts == 4)
3541 Opc = AArch64::FRINTAv4f16;
3542 else if (NumElts == 8)
3543 Opc = AArch64::FRINTAv8f16;
3544 break;
3545 case 32:
3546 if (NumElts == 2)
3547 Opc = AArch64::FRINTAv2f32;
3548 else if (NumElts == 4)
3549 Opc = AArch64::FRINTAv4f32;
3550 break;
3551 case 64:
3552 if (NumElts == 2)
3553 Opc = AArch64::FRINTAv2f64;
3554 break;
3555 }
3556 }
3557
3558 if (!Opc) {
3559 // Didn't get an opcode above, bail.
3560 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3561 return false;
3562 }
3563
3564 // Legalization would have set us up perfectly for this; we just need to
3565 // set the opcode and move on.
3566 I.setDesc(TII.get(Opc));
3567 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3568}
3569
3570bool AArch64InstructionSelector::selectVectorICmp(
3571 MachineInstr &I, MachineRegisterInfo &MRI) {
3572 Register DstReg = I.getOperand(0).getReg();
3573 LLT DstTy = MRI.getType(DstReg);
3574 Register SrcReg = I.getOperand(2).getReg();
3575 Register Src2Reg = I.getOperand(3).getReg();
3576 LLT SrcTy = MRI.getType(SrcReg);
3577
3578 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3579 unsigned NumElts = DstTy.getNumElements();
3580
3581 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3582 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3583 // Third index is cc opcode:
3584 // 0 == eq
3585 // 1 == ugt
3586 // 2 == uge
3587 // 3 == ult
3588 // 4 == ule
3589 // 5 == sgt
3590 // 6 == sge
3591 // 7 == slt
3592 // 8 == sle
3593 // ne is done by negating 'eq' result.
3594
3595 // This table below assumes that for some comparisons the operands will be
3596 // commuted.
3597 // ult op == commute + ugt op
3598 // ule op == commute + uge op
3599 // slt op == commute + sgt op
3600 // sle op == commute + sge op
3601 unsigned PredIdx = 0;
3602 bool SwapOperands = false;
3603 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3604 switch (Pred) {
3605 case CmpInst::ICMP_NE:
3606 case CmpInst::ICMP_EQ:
3607 PredIdx = 0;
3608 break;
3609 case CmpInst::ICMP_UGT:
3610 PredIdx = 1;
3611 break;
3612 case CmpInst::ICMP_UGE:
3613 PredIdx = 2;
3614 break;
3615 case CmpInst::ICMP_ULT:
3616 PredIdx = 3;
3617 SwapOperands = true;
3618 break;
3619 case CmpInst::ICMP_ULE:
3620 PredIdx = 4;
3621 SwapOperands = true;
3622 break;
3623 case CmpInst::ICMP_SGT:
3624 PredIdx = 5;
3625 break;
3626 case CmpInst::ICMP_SGE:
3627 PredIdx = 6;
3628 break;
3629 case CmpInst::ICMP_SLT:
3630 PredIdx = 7;
3631 SwapOperands = true;
3632 break;
3633 case CmpInst::ICMP_SLE:
3634 PredIdx = 8;
3635 SwapOperands = true;
3636 break;
3637 default:
3638 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3638)
;
3639 return false;
3640 }
3641
3642 // This table obviously should be tablegen'd when we have our GISel native
3643 // tablegen selector.
3644
3645 static const unsigned OpcTable[4][4][9] = {
3646 {
3647 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3648 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3649 0 /* invalid */},
3650 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3651 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3652 0 /* invalid */},
3653 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3654 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3655 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3656 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3657 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3658 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3659 },
3660 {
3661 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3662 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3663 0 /* invalid */},
3664 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3665 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3666 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3667 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3668 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3669 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3670 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3671 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3672 0 /* invalid */}
3673 },
3674 {
3675 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3676 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3677 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3678 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3679 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3680 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3681 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3682 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3683 0 /* invalid */},
3684 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3685 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3686 0 /* invalid */}
3687 },
3688 {
3689 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3690 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3691 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3692 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3693 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3694 0 /* invalid */},
3695 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3696 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3697 0 /* invalid */},
3698 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3699 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3700 0 /* invalid */}
3701 },
3702 };
3703 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3704 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3705 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3706 if (!Opc) {
3707 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3708 return false;
3709 }
3710
3711 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3712 const TargetRegisterClass *SrcRC =
3713 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3714 if (!SrcRC) {
3715 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3716 return false;
3717 }
3718
3719 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3720 if (SrcTy.getSizeInBits() == 128)
3721 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3722
3723 if (SwapOperands)
3724 std::swap(SrcReg, Src2Reg);
3725
3726 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3727 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3728
3729 // Invert if we had a 'ne' cc.
3730 if (NotOpc) {
3731 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3732 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3733 } else {
3734 MIB.buildCopy(DstReg, Cmp.getReg(0));
3735 }
3736 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3737 I.eraseFromParent();
3738 return true;
3739}
3740
3741MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3742 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3743 MachineIRBuilder &MIRBuilder) const {
3744 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3745
3746 auto BuildFn = [&](unsigned SubregIndex) {
3747 auto Ins =
3748 MIRBuilder
3749 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3750 .addImm(SubregIndex);
3751 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3752 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3753 return &*Ins;
3754 };
3755
3756 switch (EltSize) {
3757 case 16:
3758 return BuildFn(AArch64::hsub);
3759 case 32:
3760 return BuildFn(AArch64::ssub);
3761 case 64:
3762 return BuildFn(AArch64::dsub);
3763 default:
3764 return nullptr;
3765 }
3766}
3767
3768bool AArch64InstructionSelector::selectMergeValues(
3769 MachineInstr &I, MachineRegisterInfo &MRI) {
3770 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_MERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3770, __extension__ __PRETTY_FUNCTION__))
;
3771 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3772 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3773 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")(static_cast <bool> (!DstTy.isVector() && !SrcTy
.isVector() && "invalid merge operation") ? void (0) :
__assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3773, __extension__ __PRETTY_FUNCTION__))
;
3774 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3775
3776 if (I.getNumOperands() != 3)
3777 return false;
3778
3779 // Merging 2 s64s into an s128.
3780 if (DstTy == LLT::scalar(128)) {
3781 if (SrcTy.getSizeInBits() != 64)
3782 return false;
3783 Register DstReg = I.getOperand(0).getReg();
3784 Register Src1Reg = I.getOperand(1).getReg();
3785 Register Src2Reg = I.getOperand(2).getReg();
3786 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3787 MachineInstr *InsMI =
3788 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3789 if (!InsMI)
3790 return false;
3791 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3792 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3793 if (!Ins2MI)
3794 return false;
3795 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3796 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3797 I.eraseFromParent();
3798 return true;
3799 }
3800
3801 if (RB.getID() != AArch64::GPRRegBankID)
3802 return false;
3803
3804 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3805 return false;
3806
3807 auto *DstRC = &AArch64::GPR64RegClass;
3808 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3809 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3810 TII.get(TargetOpcode::SUBREG_TO_REG))
3811 .addDef(SubToRegDef)
3812 .addImm(0)
3813 .addUse(I.getOperand(1).getReg())
3814 .addImm(AArch64::sub_32);
3815 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3816 // Need to anyext the second scalar before we can use bfm
3817 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3818 TII.get(TargetOpcode::SUBREG_TO_REG))
3819 .addDef(SubToRegDef2)
3820 .addImm(0)
3821 .addUse(I.getOperand(2).getReg())
3822 .addImm(AArch64::sub_32);
3823 MachineInstr &BFM =
3824 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3825 .addDef(I.getOperand(0).getReg())
3826 .addUse(SubToRegDef)
3827 .addUse(SubToRegDef2)
3828 .addImm(32)
3829 .addImm(31);
3830 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3831 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3832 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3833 I.eraseFromParent();
3834 return true;
3835}
3836
3837static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3838 const unsigned EltSize) {
3839 // Choose a lane copy opcode and subregister based off of the size of the
3840 // vector's elements.
3841 switch (EltSize) {
3842 case 16:
3843 CopyOpc = AArch64::CPYi16;
3844 ExtractSubReg = AArch64::hsub;
3845 break;
3846 case 32:
3847 CopyOpc = AArch64::CPYi32;
3848 ExtractSubReg = AArch64::ssub;
3849 break;
3850 case 64:
3851 CopyOpc = AArch64::CPYi64;
3852 ExtractSubReg = AArch64::dsub;
3853 break;
3854 default:
3855 // Unknown size, bail out.
3856 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3857 return false;
3858 }
3859 return true;
3860}
3861
3862MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3863 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3864 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3865 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3866 unsigned CopyOpc = 0;
3867 unsigned ExtractSubReg = 0;
3868 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3869 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3870 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3871 return nullptr;
3872 }
3873
3874 const TargetRegisterClass *DstRC =
3875 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3876 if (!DstRC) {
3877 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3878 return nullptr;
3879 }
3880
3881 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3882 const LLT &VecTy = MRI.getType(VecReg);
3883 const TargetRegisterClass *VecRC =
3884 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3885 if (!VecRC) {
3886 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3887 return nullptr;
3888 }
3889
3890 // The register that we're going to copy into.
3891 Register InsertReg = VecReg;
3892 if (!DstReg)
3893 DstReg = MRI.createVirtualRegister(DstRC);
3894 // If the lane index is 0, we just use a subregister COPY.
3895 if (LaneIdx == 0) {
3896 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3897 .addReg(VecReg, 0, ExtractSubReg);
3898 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3899 return &*Copy;
3900 }
3901
3902 // Lane copies require 128-bit wide registers. If we're dealing with an
3903 // unpacked vector, then we need to move up to that width. Insert an implicit
3904 // def and a subregister insert to get us there.
3905 if (VecTy.getSizeInBits() != 128) {
3906 MachineInstr *ScalarToVector = emitScalarToVector(
3907 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3908 if (!ScalarToVector)
3909 return nullptr;
3910 InsertReg = ScalarToVector->getOperand(0).getReg();
3911 }
3912
3913 MachineInstr *LaneCopyMI =
3914 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3915 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3916
3917 // Make sure that we actually constrain the initial copy.
3918 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3919 return LaneCopyMI;
3920}
3921
3922bool AArch64InstructionSelector::selectExtractElt(
3923 MachineInstr &I, MachineRegisterInfo &MRI) {
3924 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3925, __extension__ __PRETTY_FUNCTION__))
3925 "unexpected opcode!")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT
&& "unexpected opcode!") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3925, __extension__ __PRETTY_FUNCTION__))
;
3926 Register DstReg = I.getOperand(0).getReg();
3927 const LLT NarrowTy = MRI.getType(DstReg);
3928 const Register SrcReg = I.getOperand(1).getReg();
3929 const LLT WideTy = MRI.getType(SrcReg);
3930 (void)WideTy;
3931 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3932, __extension__ __PRETTY_FUNCTION__))
3932 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() >= NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3932, __extension__ __PRETTY_FUNCTION__))
;
3933 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")(static_cast <bool> (!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? void (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3933, __extension__ __PRETTY_FUNCTION__))
;
3934
3935 // Need the lane index to determine the correct copy opcode.
3936 MachineOperand &LaneIdxOp = I.getOperand(2);
3937 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")(static_cast <bool> (LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? void (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3937, __extension__ __PRETTY_FUNCTION__))
;
3938
3939 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3940 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3941 return false;
3942 }
3943
3944 // Find the index to extract from.
3945 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3946 if (!VRegAndVal)
3947 return false;
3948 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3949
3950
3951 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3952 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3953 LaneIdx, MIB);
3954 if (!Extract)
3955 return false;
3956
3957 I.eraseFromParent();
3958 return true;
3959}
3960
3961bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3962 MachineInstr &I, MachineRegisterInfo &MRI) {
3963 unsigned NumElts = I.getNumOperands() - 1;
3964 Register SrcReg = I.getOperand(NumElts).getReg();
3965 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3966 const LLT SrcTy = MRI.getType(SrcReg);
3967
3968 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")(static_cast <bool> (NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? void (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3968, __extension__ __PRETTY_FUNCTION__))
;
3969 if (SrcTy.getSizeInBits() > 128) {
3970 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3971 return false;
3972 }
3973
3974 // We implement a split vector operation by treating the sub-vectors as
3975 // scalars and extracting them.
3976 const RegisterBank &DstRB =
3977 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3978 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3979 Register Dst = I.getOperand(OpIdx).getReg();
3980 MachineInstr *Extract =
3981 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3982 if (!Extract)
3983 return false;
3984 }
3985 I.eraseFromParent();
3986 return true;
3987}
3988
3989bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
3990 MachineRegisterInfo &MRI) {
3991 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3992, __extension__ __PRETTY_FUNCTION__))
3992 "unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES
&& "unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3992, __extension__ __PRETTY_FUNCTION__))
;
3993
3994 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3995 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3996 AArch64::FPRRegBankID ||
3997 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3998 AArch64::FPRRegBankID) {
3999 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
4000 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
4001 return false;
4002 }
4003
4004 // The last operand is the vector source register, and every other operand is
4005 // a register to unpack into.
4006 unsigned NumElts = I.getNumOperands() - 1;
4007 Register SrcReg = I.getOperand(NumElts).getReg();
4008 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4009 const LLT WideTy = MRI.getType(SrcReg);
4010 (void)WideTy;
4011 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4012, __extension__ __PRETTY_FUNCTION__))
4012 "can only unmerge from vector or s128 types!")(static_cast <bool> ((WideTy.isVector() || WideTy.getSizeInBits
() == 128) && "can only unmerge from vector or s128 types!"
) ? void (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4012, __extension__ __PRETTY_FUNCTION__))
;
4013 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4014, __extension__ __PRETTY_FUNCTION__))
4014 "source register size too small!")(static_cast <bool> (WideTy.getSizeInBits() > NarrowTy
.getSizeInBits() && "source register size too small!"
) ? void (0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4014, __extension__ __PRETTY_FUNCTION__))
;
4015
4016 if (!NarrowTy.isScalar())
4017 return selectSplitVectorUnmerge(I, MRI);
4018
4019 // Choose a lane copy opcode and subregister based off of the size of the
4020 // vector's elements.
4021 unsigned CopyOpc = 0;
4022 unsigned ExtractSubReg = 0;
4023 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4024 return false;
4025
4026 // Set up for the lane copies.
4027 MachineBasicBlock &MBB = *I.getParent();
4028
4029 // Stores the registers we'll be copying from.
4030 SmallVector<Register, 4> InsertRegs;
4031
4032 // We'll use the first register twice, so we only need NumElts-1 registers.
4033 unsigned NumInsertRegs = NumElts - 1;
4034
4035 // If our elements fit into exactly 128 bits, then we can copy from the source
4036 // directly. Otherwise, we need to do a bit of setup with some subregister
4037 // inserts.
4038 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4039 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4040 } else {
4041 // No. We have to perform subregister inserts. For each insert, create an
4042 // implicit def and a subregister insert, and save the register we create.
4043 const TargetRegisterClass *RC =
4044 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4045 WideTy.getScalarSizeInBits() * NumElts);
4046 unsigned SubReg = 0;
4047 bool Found = getSubRegForClass(RC, TRI, SubReg);
4048 (void)Found;
4049 assert(Found && "expected to find last operand's subeg idx")(static_cast <bool> (Found && "expected to find last operand's subeg idx"
) ? void (0) : __assert_fail ("Found && \"expected to find last operand's subeg idx\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4049, __extension__ __PRETTY_FUNCTION__))
;
4050 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4051 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4052 MachineInstr &ImpDefMI =
4053 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4054 ImpDefReg);
4055
4056 // Now, create the subregister insert from SrcReg.
4057 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4058 MachineInstr &InsMI =
4059 *BuildMI(MBB, I, I.getDebugLoc(),
4060 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4061 .addUse(ImpDefReg)
4062 .addUse(SrcReg)
4063 .addImm(SubReg);
4064
4065 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4066 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4067
4068 // Save the register so that we can copy from it after.
4069 InsertRegs.push_back(InsertReg);
4070 }
4071 }
4072
4073 // Now that we've created any necessary subregister inserts, we can
4074 // create the copies.
4075 //
4076 // Perform the first copy separately as a subregister copy.
4077 Register CopyTo = I.getOperand(0).getReg();
4078 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4079 .addReg(InsertRegs[0], 0, ExtractSubReg);
4080 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4081
4082 // Now, perform the remaining copies as vector lane copies.
4083 unsigned LaneIdx = 1;
4084 for (Register InsReg : InsertRegs) {
4085 Register CopyTo = I.getOperand(LaneIdx).getReg();
4086 MachineInstr &CopyInst =
4087 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4088 .addUse(InsReg)
4089 .addImm(LaneIdx);
4090 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4091 ++LaneIdx;
4092 }
4093
4094 // Separately constrain the first copy's destination. Because of the
4095 // limitation in constrainOperandRegClass, we can't guarantee that this will
4096 // actually be constrained. So, do it ourselves using the second operand.
4097 const TargetRegisterClass *RC =
4098 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4099 if (!RC) {
4100 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
4101 return false;
4102 }
4103
4104 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4105 I.eraseFromParent();
4106 return true;
4107}
4108
4109bool AArch64InstructionSelector::selectConcatVectors(
4110 MachineInstr &I, MachineRegisterInfo &MRI) {
4111 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4112, __extension__ __PRETTY_FUNCTION__))
4112 "Unexpected opcode")(static_cast <bool> (I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS
&& "Unexpected opcode") ? void (0) : __assert_fail (
"I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4112, __extension__ __PRETTY_FUNCTION__))
;
4113 Register Dst = I.getOperand(0).getReg();
4114 Register Op1 = I.getOperand(1).getReg();
4115 Register Op2 = I.getOperand(2).getReg();
4116 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4117 if (!ConcatMI)
4118 return false;
4119 I.eraseFromParent();
4120 return true;
4121}
4122
4123unsigned
4124AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4125 MachineFunction &MF) const {
4126 Type *CPTy = CPVal->getType();
4127 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4128
4129 MachineConstantPool *MCP = MF.getConstantPool();
4130 return MCP->getConstantPoolIndex(CPVal, Alignment);
4131}
4132
4133MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4134 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4135 auto &MF = MIRBuilder.getMF();
4136 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4137
4138 auto Adrp =
4139 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4140 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4141
4142 MachineInstr *LoadMI = nullptr;
4143 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4144 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4145 switch (Size) {
4146 case 16:
4147 LoadMI =
4148 &*MIRBuilder
4149 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4150 .addConstantPoolIndex(CPIdx, 0,
4151 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4152 break;
4153 case 8:
4154 LoadMI =
4155 &*MIRBuilder
4156 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4157 .addConstantPoolIndex(CPIdx, 0,
4158 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4159 break;
4160 case 4:
4161 LoadMI =
4162 &*MIRBuilder
4163 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4164 .addConstantPoolIndex(CPIdx, 0,
4165 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4166 break;
4167 default:
4168 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
4169 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
4170 return nullptr;
4171 }
4172 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4173 MachineMemOperand::MOLoad,
4174 Size, Align(Size)));
4175 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4176 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4177 return LoadMI;
4178}
4179
4180/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4181/// size and RB.
4182static std::pair<unsigned, unsigned>
4183getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4184 unsigned Opc, SubregIdx;
4185 if (RB.getID() == AArch64::GPRRegBankID) {
4186 if (EltSize == 16) {
4187 Opc = AArch64::INSvi16gpr;
4188 SubregIdx = AArch64::ssub;
4189 } else if (EltSize == 32) {
4190 Opc = AArch64::INSvi32gpr;
4191 SubregIdx = AArch64::ssub;
4192 } else if (EltSize == 64) {
4193 Opc = AArch64::INSvi64gpr;
4194 SubregIdx = AArch64::dsub;
4195 } else {
4196 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4196)
;
4197 }
4198 } else {
4199 if (EltSize == 8) {
4200 Opc = AArch64::INSvi8lane;
4201 SubregIdx = AArch64::bsub;
4202 } else if (EltSize == 16) {
4203 Opc = AArch64::INSvi16lane;
4204 SubregIdx = AArch64::hsub;
4205 } else if (EltSize == 32) {
4206 Opc = AArch64::INSvi32lane;
4207 SubregIdx = AArch64::ssub;
4208 } else if (EltSize == 64) {
4209 Opc = AArch64::INSvi64lane;
4210 SubregIdx = AArch64::dsub;
4211 } else {
4212 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4212)
;
4213 }
4214 }
4215 return std::make_pair(Opc, SubregIdx);
4216}
4217
4218MachineInstr *AArch64InstructionSelector::emitInstr(
4219 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4220 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4221 const ComplexRendererFns &RenderFns) const {
4222 assert(Opcode && "Expected an opcode?")(static_cast <bool> (Opcode && "Expected an opcode?"
) ? void (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4222, __extension__ __PRETTY_FUNCTION__))
;
4223 assert(!isPreISelGenericOpcode(Opcode) &&(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4224, __extension__ __PRETTY_FUNCTION__))
4224 "Function should only be used to produce selected instructions!")(static_cast <bool> (!isPreISelGenericOpcode(Opcode) &&
"Function should only be used to produce selected instructions!"
) ? void (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4224, __extension__ __PRETTY_FUNCTION__))
;
4225 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4226 if (RenderFns)
4227 for (auto &Fn : *RenderFns)
4228 Fn(MI);
4229 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4230 return &*MI;
4231}
4232
4233MachineInstr *AArch64InstructionSelector::emitAddSub(
4234 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4235 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4236 MachineIRBuilder &MIRBuilder) const {
4237 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4238 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4238, __extension__ __PRETTY_FUNCTION__))
;
2
'?' condition is true
4239 auto Ty = MRI.getType(LHS.getReg());
4240 assert(!Ty.isVector() && "Expected a scalar or pointer?")(static_cast <bool> (!Ty.isVector() && "Expected a scalar or pointer?"
) ? void (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4240, __extension__ __PRETTY_FUNCTION__))
;
3
'?' condition is true
4241 unsigned Size = Ty.getSizeInBits();
4242 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit type only") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4242, __extension__ __PRETTY_FUNCTION__))
;
4
Assuming 'Size' is not equal to 32
5
Assuming 'Size' is equal to 64
6
'?' condition is true
4243 bool Is32Bit = Size == 32;
4244
4245 // INSTRri form with positive arithmetic immediate.
4246 if (auto Fns = selectArithImmed(RHS))
7
Taking false branch
4247 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4248 MIRBuilder, Fns);
4249
4250 // INSTRri form with negative arithmetic immediate.
4251 if (auto Fns = selectNegArithImmed(RHS))
8
Taking false branch
4252 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4253 MIRBuilder, Fns);
4254
4255 // INSTRrx form.
4256 if (auto Fns = selectArithExtendedRegister(RHS))
9
Calling 'AArch64InstructionSelector::selectArithExtendedRegister'
4257 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4258 MIRBuilder, Fns);
4259
4260 // INSTRrs form.
4261 if (auto Fns = selectShiftedRegister(RHS))
4262 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4263 MIRBuilder, Fns);
4264 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4265 MIRBuilder);
4266}
4267
4268MachineInstr *
4269AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4270 MachineOperand &RHS,
4271 MachineIRBuilder &MIRBuilder) const {
4272 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4273 {{AArch64::ADDXri, AArch64::ADDWri},
4274 {AArch64::ADDXrs, AArch64::ADDWrs},
4275 {AArch64::ADDXrr, AArch64::ADDWrr},
4276 {AArch64::SUBXri, AArch64::SUBWri},
4277 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4278 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
1
Calling 'AArch64InstructionSelector::emitAddSub'
4279}
4280
4281MachineInstr *
4282AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4283 MachineOperand &RHS,
4284 MachineIRBuilder &MIRBuilder) const {
4285 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4286 {{AArch64::ADDSXri, AArch64::ADDSWri},
4287 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4288 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4289 {AArch64::SUBSXri, AArch64::SUBSWri},
4290 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4291 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4292}
4293
4294MachineInstr *
4295AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4296 MachineOperand &RHS,
4297 MachineIRBuilder &MIRBuilder) const {
4298 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4299 {{AArch64::SUBSXri, AArch64::SUBSWri},
4300 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4301 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4302 {AArch64::ADDSXri, AArch64::ADDSWri},
4303 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4304 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4305}
4306
4307MachineInstr *
4308AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4309 MachineIRBuilder &MIRBuilder) const {
4310 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4311 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4312 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4313 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4314}
4315
4316MachineInstr *
4317AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4318 MachineIRBuilder &MIRBuilder) const {
4319 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected register operands?") ? void (0) : __assert_fail
("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4319, __extension__ __PRETTY_FUNCTION__))
;
4320 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4321 LLT Ty = MRI.getType(LHS.getReg());
4322 unsigned RegSize = Ty.getSizeInBits();
4323 bool Is32Bit = (RegSize == 32);
4324 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4325 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4326 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4327 // ANDS needs a logical immediate for its immediate form. Check if we can
4328 // fold one in.
4329 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4330 int64_t Imm = ValAndVReg->Value.getSExtValue();
4331
4332 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4333 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4334 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4335 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4336 return &*TstMI;
4337 }
4338 }
4339
4340 if (auto Fns = selectLogicalShiftedRegister(RHS))
4341 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4342 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4343}
4344
4345MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4346 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4347 MachineIRBuilder &MIRBuilder) const {
4348 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& "Expected LHS and RHS to be registers!") ? void (
0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4348, __extension__ __PRETTY_FUNCTION__))
;
4349 assert(Predicate.isPredicate() && "Expected predicate?")(static_cast <bool> (Predicate.isPredicate() &&
"Expected predicate?") ? void (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4349, __extension__ __PRETTY_FUNCTION__))
;
4350 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4351 LLT CmpTy = MRI.getType(LHS.getReg());
4352 assert(!CmpTy.isVector() && "Expected scalar or pointer")(static_cast <bool> (!CmpTy.isVector() && "Expected scalar or pointer"
) ? void (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4352, __extension__ __PRETTY_FUNCTION__))
;
4353 unsigned Size = CmpTy.getSizeInBits();
4354 (void)Size;
4355 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(static_cast <bool> ((Size == 32 || Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4355, __extension__ __PRETTY_FUNCTION__))
;
4356 // Fold the compare into a cmn or tst if possible.
4357 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4358 return FoldCmp;
4359 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4360 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4361}
4362
4363MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4364 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4365 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4366#ifndef NDEBUG
4367 LLT Ty = MRI.getType(Dst);
4368 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4369, __extension__ __PRETTY_FUNCTION__))
4369 "Expected a 32-bit scalar register?")(static_cast <bool> (!Ty.isVector() && Ty.getSizeInBits
() == 32 && "Expected a 32-bit scalar register?") ? void
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4369, __extension__ __PRETTY_FUNCTION__))
;
4370#endif
4371 const Register ZeroReg = AArch64::WZR;
4372 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4373 auto CSet =
4374 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4375 .addImm(getInvertedCondCode(CC));
4376 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4377 return &*CSet;
4378 };
4379
4380 AArch64CC::CondCode CC1, CC2;
4381 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4382 if (CC2 == AArch64CC::AL)
4383 return EmitCSet(Dst, CC1);
4384
4385 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4386 Register Def1Reg = MRI.createVirtualRegister(RC);
4387 Register Def2Reg = MRI.createVirtualRegister(RC);
4388 EmitCSet(Def1Reg, CC1);
4389 EmitCSet(Def2Reg, CC2);
4390 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4391 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4392 return &*OrMI;
4393}
4394
4395MachineInstr *
4396AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4397 MachineIRBuilder &MIRBuilder,
4398 Optional<CmpInst::Predicate> Pred) const {
4399 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4400 LLT Ty = MRI.getType(LHS);
4401 if (Ty.isVector())
4402 return nullptr;
4403 unsigned OpSize = Ty.getSizeInBits();
4404 if (OpSize != 32 && OpSize != 64)
4405 return nullptr;
4406
4407 // If this is a compare against +0.0, then we don't have
4408 // to explicitly materialize a constant.
4409 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4410 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4411
4412 auto IsEqualityPred = [](CmpInst::Predicate P) {
4413 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4414 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4415 };
4416 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4417 // Try commutating the operands.
4418 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4419 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4420 ShouldUseImm = true;
4421 std::swap(LHS, RHS);
4422 }
4423 }
4424 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4425 {AArch64::FCMPSri, AArch64::FCMPDri}};
4426 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4427
4428 // Partially build the compare. Decide if we need to add a use for the
4429 // third operand based off whether or not we're comparing against 0.0.
4430 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4431 if (!ShouldUseImm)
4432 CmpMI.addUse(RHS);
4433 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4434 return &*CmpMI;
4435}
4436
4437MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4438 Optional<Register> Dst, Register Op1, Register Op2,
4439 MachineIRBuilder &MIRBuilder) const {
4440 // We implement a vector concat by:
4441 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4442 // 2. Insert the upper vector into the destination's upper element
4443 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4444 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4445
4446 const LLT Op1Ty = MRI.getType(Op1);
4447 const LLT Op2Ty = MRI.getType(Op2);
4448
4449 if (Op1Ty != Op2Ty) {
4450 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4451 return nullptr;
4452 }
4453 assert(Op1Ty.isVector() && "Expected a vector for vector concat")(static_cast <bool> (Op1Ty.isVector() && "Expected a vector for vector concat"
) ? void (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4453, __extension__ __PRETTY_FUNCTION__))
;
4454
4455 if (Op1Ty.getSizeInBits() >= 128) {
4456 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4457 return nullptr;
4458 }
4459
4460 // At the moment we just support 64 bit vector concats.
4461 if (Op1Ty.getSizeInBits() != 64) {
4462 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4463 return nullptr;
4464 }
4465
4466 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4467 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4468 const TargetRegisterClass *DstRC =
4469 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4470
4471 MachineInstr *WidenedOp1 =
4472 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4473 MachineInstr *WidenedOp2 =
4474 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4475 if (!WidenedOp1 || !WidenedOp2) {
4476 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4477 return nullptr;
4478 }
4479
4480 // Now do the insert of the upper element.
4481 unsigned InsertOpc, InsSubRegIdx;
4482 std::tie(InsertOpc, InsSubRegIdx) =
4483 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4484
4485 if (!Dst)
4486 Dst = MRI.createVirtualRegister(DstRC);
4487 auto InsElt =
4488 MIRBuilder
4489 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4490 .addImm(1) /* Lane index */
4491 .addUse(WidenedOp2->getOperand(0).getReg())
4492 .addImm(0);
4493 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4494 return &*InsElt;
4495}
4496
4497MachineInstr *
4498AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4499 MachineIRBuilder &MIRBuilder,
4500 Register SrcReg) const {
4501 // CSINC increments the result when the predicate is false. Invert it.
4502 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4503 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4504 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4505 .addImm(InvCC);
4506 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4507 return &*I;
4508}
4509
4510std::pair<MachineInstr *, AArch64CC::CondCode>
4511AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4512 MachineOperand &LHS,
4513 MachineOperand &RHS,
4514 MachineIRBuilder &MIRBuilder) const {
4515 switch (Opcode) {
4516 default:
4517 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4517)
;
4518 case TargetOpcode::G_SADDO:
4519 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4520 case TargetOpcode::G_UADDO:
4521 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4522 case TargetOpcode::G_SSUBO:
4523 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4524 case TargetOpcode::G_USUBO:
4525 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4526 }
4527}
4528
4529bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4530 MachineRegisterInfo &MRI = *MIB.getMRI();
4531 // We want to recognize this pattern:
4532 //
4533 // $z = G_FCMP pred, $x, $y
4534 // ...
4535 // $w = G_SELECT $z, $a, $b
4536 //
4537 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4538 // some copies/truncs in between.)
4539 //
4540 // If we see this, then we can emit something like this:
4541 //
4542 // fcmp $x, $y
4543 // fcsel $w, $a, $b, pred
4544 //
4545 // Rather than emitting both of the rather long sequences in the standard
4546 // G_FCMP/G_SELECT select methods.
4547
4548 // First, check if the condition is defined by a compare.
4549 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4550 while (CondDef) {
4551 // We can only fold if all of the defs have one use.
4552 Register CondDefReg = CondDef->getOperand(0).getReg();
4553 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4554 // Unless it's another select.
4555 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4556 if (CondDef == &UI)
4557 continue;
4558 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4559 return false;
4560 }
4561 }
4562
4563 // We can skip over G_TRUNC since the condition is 1-bit.
4564 // Truncating/extending can have no impact on the value.
4565 unsigned Opc = CondDef->getOpcode();
4566 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4567 break;
4568
4569 // Can't see past copies from physregs.
4570 if (Opc == TargetOpcode::COPY &&
4571 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4572 return false;
4573
4574 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4575 }
4576
4577 // Is the condition defined by a compare?
4578 if (!CondDef)
4579 return false;
4580
4581 unsigned CondOpc = CondDef->getOpcode();
4582 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4583 return false;
4584
4585 AArch64CC::CondCode CondCode;
4586 if (CondOpc == TargetOpcode::G_ICMP) {
4587 auto Pred =
4588 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4589 CondCode = changeICMPPredToAArch64CC(Pred);
4590 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4591 CondDef->getOperand(1), MIB);
4592 } else {
4593 // Get the condition code for the select.
4594 auto Pred =
4595 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4596 AArch64CC::CondCode CondCode2;
4597 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4598
4599 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4600 // instructions to emit the comparison.
4601 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4602 // unnecessary.
4603 if (CondCode2 != AArch64CC::AL)
4604 return false;
4605
4606 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4607 CondDef->getOperand(3).getReg(), MIB)) {
4608 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4609 return false;
4610 }
4611 }
4612
4613 // Emit the select.
4614 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4615 I.getOperand(3).getReg(), CondCode, MIB);
4616 I.eraseFromParent();
4617 return true;
4618}
4619
4620MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4621 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4622 MachineIRBuilder &MIRBuilder) const {
4623 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4624, __extension__ __PRETTY_FUNCTION__))
4624 "Unexpected MachineOperand")(static_cast <bool> (LHS.isReg() && RHS.isReg()
&& Predicate.isPredicate() && "Unexpected MachineOperand"
) ? void (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4624, __extension__ __PRETTY_FUNCTION__))
;
4625 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4626 // We want to find this sort of thing:
4627 // x = G_SUB 0, y
4628 // G_ICMP z, x
4629 //
4630 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4631 // e.g:
4632 //
4633 // cmn z, y
4634
4635 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4636 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4637 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4638 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4639 // Given this:
4640 //
4641 // x = G_SUB 0, y
4642 // G_ICMP x, z
4643 //
4644 // Produce this:
4645 //
4646 // cmn y, z
4647 if (isCMN(LHSDef, P, MRI))
4648 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4649
4650 // Same idea here, but with the RHS of the compare instead:
4651 //
4652 // Given this:
4653 //
4654 // x = G_SUB 0, y
4655 // G_ICMP z, x
4656 //
4657 // Produce this:
4658 //
4659 // cmn z, y
4660 if (isCMN(RHSDef, P, MRI))
4661 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4662
4663 // Given this:
4664 //
4665 // z = G_AND x, y
4666 // G_ICMP z, 0
4667 //
4668 // Produce this if the compare is signed:
4669 //
4670 // tst x, y
4671 if (!CmpInst::isUnsigned(P) && LHSDef &&
4672 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4673 // Make sure that the RHS is 0.
4674 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4675 if (!ValAndVReg || ValAndVReg->Value != 0)
4676 return nullptr;
4677
4678 return emitTST(LHSDef->getOperand(1),
4679 LHSDef->getOperand(2), MIRBuilder);
4680 }
4681
4682 return nullptr;
4683}
4684
4685bool AArch64InstructionSelector::selectShuffleVector(
4686 MachineInstr &I, MachineRegisterInfo &MRI) {
4687 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4688 Register Src1Reg = I.getOperand(1).getReg();
4689 const LLT Src1Ty = MRI.getType(Src1Reg);
4690 Register Src2Reg = I.getOperand(2).getReg();
4691 const LLT Src2Ty = MRI.getType(Src2Reg);
4692 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4693
4694 MachineBasicBlock &MBB = *I.getParent();
4695 MachineFunction &MF = *MBB.getParent();
4696 LLVMContext &Ctx = MF.getFunction().getContext();
4697
4698 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4699 // it's originated from a <1 x T> type. Those should have been lowered into
4700 // G_BUILD_VECTOR earlier.
4701 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4702 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4703 return false;
4704 }
4705
4706 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4707
4708 SmallVector<Constant *, 64> CstIdxs;
4709 for (int Val : Mask) {
4710 // For now, any undef indexes we'll just assume to be 0. This should be
4711 // optimized in future, e.g. to select DUP etc.
4712 Val = Val < 0 ? 0 : Val;
4713 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4714 unsigned Offset = Byte + Val * BytesPerElt;
4715 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4716 }
4717 }
4718
4719 // Use a constant pool to load the index vector for TBL.
4720 Constant *CPVal = ConstantVector::get(CstIdxs);
4721 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4722 if (!IndexLoad) {
4723 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4724 return false;
4725 }
4726
4727 if (DstTy.getSizeInBits() != 128) {
4728 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")(static_cast <bool> (DstTy.getSizeInBits() == 64 &&
"Unexpected shuffle result ty") ? void (0) : __assert_fail (
"DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4728, __extension__ __PRETTY_FUNCTION__))
;
4729 // This case can be done with TBL1.
4730 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4731 if (!Concat) {
4732 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4733 return false;
4734 }
4735
4736 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4737 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4738 IndexLoad->getOperand(0).getReg(), MIB);
4739
4740 auto TBL1 = MIB.buildInstr(
4741 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4742 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4743 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4744
4745 auto Copy =
4746 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4747 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4748 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4749 I.eraseFromParent();
4750 return true;
4751 }
4752
4753 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4754 // Q registers for regalloc.
4755 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4756 auto RegSeq = createQTuple(Regs, MIB);
4757 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4758 {RegSeq, IndexLoad->getOperand(0)});
4759 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4760 I.eraseFromParent();
4761 return true;
4762}
4763
4764MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4765 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4766 unsigned LaneIdx, const RegisterBank &RB,
4767 MachineIRBuilder &MIRBuilder) const {
4768 MachineInstr *InsElt = nullptr;
4769 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4770 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4771
4772 // Create a register to define with the insert if one wasn't passed in.
4773 if (!DstReg)
4774 DstReg = MRI.createVirtualRegister(DstRC);
4775
4776 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4777 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4778
4779 if (RB.getID() == AArch64::FPRRegBankID) {
4780 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4781 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4782 .addImm(LaneIdx)
4783 .addUse(InsSub->getOperand(0).getReg())
4784 .addImm(0);
4785 } else {
4786 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4787 .addImm(LaneIdx)
4788 .addUse(EltReg);
4789 }
4790
4791 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4792 return InsElt;
4793}
4794
4795bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4796 MachineRegisterInfo &MRI) {
4797 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4797, __extension__ __PRETTY_FUNCTION__))
;
4798
4799 // Get information on the destination.
4800 Register DstReg = I.getOperand(0).getReg();
4801 const LLT DstTy = MRI.getType(DstReg);
4802 unsigned VecSize = DstTy.getSizeInBits();
4803
4804 // Get information on the element we want to insert into the destination.
4805 Register EltReg = I.getOperand(2).getReg();
4806 const LLT EltTy = MRI.getType(EltReg);
4807 unsigned EltSize = EltTy.getSizeInBits();
4808 if (EltSize < 16 || EltSize > 64)
4809 return false; // Don't support all element types yet.
4810
4811 // Find the definition of the index. Bail out if it's not defined by a
4812 // G_CONSTANT.
4813 Register IdxReg = I.getOperand(3).getReg();
4814 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4815 if (!VRegAndVal)
4816 return false;
4817 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4818
4819 // Perform the lane insert.
4820 Register SrcReg = I.getOperand(1).getReg();
4821 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4822
4823 if (VecSize < 128) {
4824 // If the vector we're inserting into is smaller than 128 bits, widen it
4825 // to 128 to do the insert.
4826 MachineInstr *ScalarToVec =
4827 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
4828 if (!ScalarToVec)
4829 return false;
4830 SrcReg = ScalarToVec->getOperand(0).getReg();
4831 }
4832
4833 // Create an insert into a new FPR128 register.
4834 // Note that if our vector is already 128 bits, we end up emitting an extra
4835 // register.
4836 MachineInstr *InsMI =
4837 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
4838
4839 if (VecSize < 128) {
4840 // If we had to widen to perform the insert, then we have to demote back to
4841 // the original size to get the result we want.
4842 Register DemoteVec = InsMI->getOperand(0).getReg();
4843 const TargetRegisterClass *RC =
4844 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4845 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4846 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4847 return false;
4848 }
4849 unsigned SubReg = 0;
4850 if (!getSubRegForClass(RC, TRI, SubReg))
4851 return false;
4852 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4853 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4854 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4855 return false;
4856 }
4857 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4858 .addReg(DemoteVec, 0, SubReg);
4859 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4860 } else {
4861 // No widening needed.
4862 InsMI->getOperand(0).setReg(DstReg);
4863 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4864 }
4865
4866 I.eraseFromParent();
4867 return true;
4868}
4869
4870MachineInstr *
4871AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
4872 MachineIRBuilder &MIRBuilder,
4873 MachineRegisterInfo &MRI) {
4874 LLT DstTy = MRI.getType(Dst);
4875 unsigned DstSize = DstTy.getSizeInBits();
4876 if (CV->isNullValue()) {
4877 if (DstSize == 128) {
4878 auto Mov =
4879 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
4880 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4881 return &*Mov;
4882 }
4883
4884 if (DstSize == 64) {
4885 auto Mov =
4886 MIRBuilder
4887 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4888 .addImm(0);
4889 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
4890 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4891 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
4892 return &*Copy;
4893 }
4894 }
4895
4896 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
4897 if (!CPLoad) {
4898 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for constant vector!"
; } } while (false)
;
4899 return nullptr;
4900 }
4901
4902 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
4903 RBI.constrainGenericRegister(
4904 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
4905 return &*Copy;
4906}
4907
4908bool AArch64InstructionSelector::tryOptConstantBuildVec(
4909 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
4910 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4910, __extension__ __PRETTY_FUNCTION__))
;
4911 unsigned DstSize = DstTy.getSizeInBits();
4912 assert(DstSize <= 128 && "Unexpected build_vec type!")(static_cast <bool> (DstSize <= 128 && "Unexpected build_vec type!"
) ? void (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4912, __extension__ __PRETTY_FUNCTION__))
;
4913 if (DstSize < 32)
4914 return false;
4915 // Check if we're building a constant vector, in which case we want to
4916 // generate a constant pool load instead of a vector insert sequence.
4917 SmallVector<Constant *, 16> Csts;
4918 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4919 // Try to find G_CONSTANT or G_FCONSTANT
4920 auto *OpMI =
4921 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4922 if (OpMI)
4923 Csts.emplace_back(
4924 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4925 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4926 I.getOperand(Idx).getReg(), MRI)))
4927 Csts.emplace_back(
4928 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4929 else
4930 return false;
4931 }
4932 Constant *CV = ConstantVector::get(Csts);
4933 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
4934 return false;
4935 I.eraseFromParent();
4936 return true;
4937}
4938
4939bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
4940 MachineRegisterInfo &MRI) {
4941 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)(static_cast <bool> (I.getOpcode() == TargetOpcode::G_BUILD_VECTOR
) ? void (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4941, __extension__ __PRETTY_FUNCTION__))
;
4942 // Until we port more of the optimized selections, for now just use a vector
4943 // insert sequence.
4944 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4945 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4946 unsigned EltSize = EltTy.getSizeInBits();
4947
4948 if (tryOptConstantBuildVec(I, DstTy, MRI))
4949 return true;
4950 if (EltSize < 16 || EltSize > 64)
4951 return false; // Don't support all element types yet.
4952 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4953
4954 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4955 MachineInstr *ScalarToVec =
4956 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4957 I.getOperand(1).getReg(), MIB);
4958 if (!ScalarToVec)
4959 return false;
4960
4961 Register DstVec = ScalarToVec->getOperand(0).getReg();
4962 unsigned DstSize = DstTy.getSizeInBits();
4963
4964 // Keep track of the last MI we inserted. Later on, we might be able to save
4965 // a copy using it.
4966 MachineInstr *PrevMI = nullptr;
4967 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4968 // Note that if we don't do a subregister copy, we can end up making an
4969 // extra register.
4970 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4971 MIB);
4972 DstVec = PrevMI->getOperand(0).getReg();
4973 }
4974
4975 // If DstTy's size in bits is less than 128, then emit a subregister copy
4976 // from DstVec to the last register we've defined.
4977 if (DstSize < 128) {
4978 // Force this to be FPR using the destination vector.
4979 const TargetRegisterClass *RC =
4980 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4981 if (!RC)
4982 return false;
4983 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4984 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4985 return false;
4986 }
4987
4988 unsigned SubReg = 0;
4989 if (!getSubRegForClass(RC, TRI, SubReg))
4990 return false;
4991 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4992 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4993 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4994 return false;
4995 }
4996
4997 Register Reg = MRI.createVirtualRegister(RC);
4998 Register DstReg = I.getOperand(0).getReg();
4999
5000 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5001 MachineOperand &RegOp = I.getOperand(1);
5002 RegOp.setReg(Reg);
5003 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5004 } else {
5005 // We don't need a subregister copy. Save a copy by re-using the
5006 // destination register on the final insert.
5007 assert(PrevMI && "PrevMI was null?")(static_cast <bool> (PrevMI && "PrevMI was null?"
) ? void (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5007, __extension__ __PRETTY_FUNCTION__))
;
5008 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5009 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5010 }
5011
5012 I.eraseFromParent();
5013 return true;
5014}
5015
5016/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
5017/// ID if it exists, and 0 otherwise.
5018static unsigned findIntrinsicID(MachineInstr &I) {
5019 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
5020 return Op.isIntrinsicID();
5021 });
5022 if (IntrinOp == I.operands_end())
5023 return 0;
5024 return IntrinOp->getIntrinsicID();
5025}
5026
5027bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5028 MachineInstr &I, MachineRegisterInfo &MRI) {
5029 // Find the intrinsic ID.
5030 unsigned IntrinID = findIntrinsicID(I);
5031 if (!IntrinID)
5032 return false;
5033
5034 // Select the instruction.
5035 switch (IntrinID) {
5036 default:
5037 return false;
5038 case Intrinsic::aarch64_ldxp:
5039 case Intrinsic::aarch64_ldaxp: {
5040 auto NewI = MIB.buildInstr(
5041 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5042 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5043 {I.getOperand(3)});
5044 NewI.cloneMemRefs(I);
5045 break;
5046 }
5047 case Intrinsic::trap:
5048 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5049 break;
5050 case Intrinsic::debugtrap:
5051 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5052 break;
5053 case Intrinsic::ubsantrap:
5054 MIB.buildInstr(AArch64::BRK, {}, {})
5055 .addImm(I.getOperand(1).getImm() | ('U' << 8));
5056 break;
5057 case Intrinsic::aarch64_neon_st2: {
5058 Register Src1 = I.getOperand(1).getReg();
5059 Register Src2 = I.getOperand(2).getReg();
5060 Register Ptr = I.getOperand(3).getReg();
5061 LLT Ty = MRI.getType(Src1);
5062 const LLT S8 = LLT::scalar(8);
5063 const LLT S16 = LLT::scalar(16);
5064 const LLT S32 = LLT::scalar(32);
5065 const LLT S64 = LLT::scalar(64);
5066 const LLT P0 = LLT::pointer(0, 64);
5067 unsigned Opc;
5068 if (Ty == LLT::fixed_vector(8, S8))
5069 Opc = AArch64::ST2Twov8b;
5070 else if (Ty == LLT::fixed_vector(16, S8))
5071 Opc = AArch64::ST2Twov16b;
5072 else if (Ty == LLT::fixed_vector(4, S16))
5073 Opc = AArch64::ST2Twov4h;
5074 else if (Ty == LLT::fixed_vector(8, S16))
5075 Opc = AArch64::ST2Twov8h;
5076 else if (Ty == LLT::fixed_vector(2, S32))
5077 Opc = AArch64::ST2Twov2s;
5078 else if (Ty == LLT::fixed_vector(4, S32))
5079 Opc = AArch64::ST2Twov4s;
5080 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5081 Opc = AArch64::ST2Twov2d;
5082 else if (Ty == S64 || Ty == P0)
5083 Opc = AArch64::ST1Twov1d;
5084 else
5085 llvm_unreachable("Unexpected type for st2!")::llvm::llvm_unreachable_internal("Unexpected type for st2!",
"/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5085)
;
5086 SmallVector<Register, 2> Regs = {Src1, Src2};
5087 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5088 : createDTuple(Regs, MIB);
5089 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5090 Store.cloneMemRefs(I);
5091 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5092 break;
5093 }
5094 }
5095
5096 I.eraseFromParent();
5097 return true;
5098}
5099
5100bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5101 MachineRegisterInfo &MRI) {
5102 unsigned IntrinID = findIntrinsicID(I);
5103 if (!IntrinID)
5104 return false;
5105
5106 switch (IntrinID) {
5107 default:
5108 break;
5109 case Intrinsic::aarch64_crypto_sha1h: {
5110 Register DstReg = I.getOperand(0).getReg();
5111 Register SrcReg = I.getOperand(2).getReg();
5112
5113 // FIXME: Should this be an assert?
5114 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5115 MRI.getType(SrcReg).getSizeInBits() != 32)
5116 return false;
5117
5118 // The operation has to happen on FPRs. Set up some new FPR registers for
5119 // the source and destination if they are on GPRs.
5120 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5121 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5122 MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5123
5124 // Make sure the copy ends up getting constrained properly.
5125 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5126 AArch64::GPR32RegClass, MRI);
5127 }
5128
5129 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5130 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5131
5132 // Actually insert the instruction.
5133 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5134 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5135
5136 // Did we create a new register for the destination?
5137 if (DstReg != I.getOperand(0).getReg()) {
5138 // Yep. Copy the result of the instruction back into the original
5139 // destination.
5140 MIB.buildCopy({I.getOperand(0)}, {DstReg});
5141 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5142 AArch64::GPR32RegClass, MRI);
5143 }
5144
5145 I.eraseFromParent();
5146 return true;
5147 }
5148 case Intrinsic::frameaddress:
5149 case Intrinsic::returnaddress: {
5150 MachineFunction &MF = *I.getParent()->getParent();
5151 MachineFrameInfo &MFI = MF.getFrameInfo();
5152
5153 unsigned Depth = I.getOperand(2).getImm();
5154 Register DstReg = I.getOperand(0).getReg();
5155 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5156
5157 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5158 if (!MFReturnAddr) {
5159 // Insert the copy from LR/X30 into the entry block, before it can be
5160 // clobbered by anything.
5161 MFI.setReturnAddressIsTaken(true);
5162 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5163 AArch64::GPR64RegClass);
5164 }
5165
5166 if (STI.hasPAuth()) {
5167 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5168 } else {
5169 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5170 MIB.buildInstr(AArch64::XPACLRI);
5171 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5172 }
5173
5174 I.eraseFromParent();
5175 return true;
5176 }
5177
5178 MFI.setFrameAddressIsTaken(true);
5179 Register FrameAddr(AArch64::FP);
5180 while (Depth--) {
5181 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5182 auto Ldr =
5183 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5184 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5185 FrameAddr = NextFrame;
5186 }
5187
5188 if (IntrinID == Intrinsic::frameaddress)
5189 MIB.buildCopy({DstReg}, {FrameAddr});
5190 else {
5191 MFI.setReturnAddressIsTaken(true);
5192
5193 if (STI.hasPAuth()) {
5194 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5195 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5196 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5197 } else {
5198 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5199 .addImm(1);
5200 MIB.buildInstr(AArch64::XPACLRI);
5201 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5202 }
5203 }
5204
5205 I.eraseFromParent();
5206 return true;
5207 }
5208 case Intrinsic::swift_async_context_addr:
5209 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5210 {Register(AArch64::FP)})
5211 .addImm(8)
5212 .addImm(0);
5213 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5214
5215 MF->getFrameInfo().setFrameAddressIsTaken(true);
5216 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5217 I.eraseFromParent();
5218 return true;
5219 }
5220 return false;
5221}
5222
5223InstructionSelector::ComplexRendererFns
5224AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5225 auto MaybeImmed = getImmedFromMO(Root);
5226 if (MaybeImmed == None || *MaybeImmed > 31)
5227 return None;
5228 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5229 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5230}
5231
5232InstructionSelector::ComplexRendererFns
5233AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5234 auto MaybeImmed = getImmedFromMO(Root);
5235 if (MaybeImmed == None || *MaybeImmed > 31)
5236 return None;
5237 uint64_t Enc = 31 - *MaybeImmed;
5238 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5239}
5240
5241InstructionSelector::ComplexRendererFns
5242AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5243 auto MaybeImmed = getImmedFromMO(Root);
5244 if (MaybeImmed == None || *MaybeImmed > 63)
5245 return None;
5246 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5247 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5248}
5249
5250InstructionSelector::ComplexRendererFns
5251AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5252 auto MaybeImmed = getImmedFromMO(Root);
5253 if (MaybeImmed == None || *MaybeImmed > 63)
5254 return None;
5255 uint64_t Enc = 63 - *MaybeImmed;
5256 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5257}
5258
5259/// Helper to select an immediate value that can be represented as a 12-bit
5260/// value shifted left by either 0 or 12. If it is possible to do so, return
5261/// the immediate and shift value. If not, return None.
5262///
5263/// Used by selectArithImmed and selectNegArithImmed.
5264InstructionSelector::ComplexRendererFns
5265AArch64InstructionSelector::select12BitValueWithLeftShift(
5266 uint64_t Immed) const {
5267 unsigned ShiftAmt;
5268 if (Immed >> 12 == 0) {
5269 ShiftAmt = 0;
5270 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5271 ShiftAmt = 12;
5272 Immed = Immed >> 12;
5273 } else
5274 return None;
5275
5276 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5277 return {{
5278 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5279 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5280 }};
5281}
5282
5283/// SelectArithImmed - Select an immediate value that can be represented as
5284/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5285/// Val set to the 12-bit value and Shift set to the shifter operand.
5286InstructionSelector::ComplexRendererFns
5287AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5288 // This function is called from the addsub_shifted_imm ComplexPattern,
5289 // which lists [imm] as the list of opcode it's interested in, however
5290 // we still need to check whether the operand is actually an immediate
5291 // here because the ComplexPattern opcode list is only used in
5292 // root-level opcode matching.
5293 auto MaybeImmed = getImmedFromMO(Root);
5294 if (MaybeImmed == None)
5295 return None;
5296 return select12BitValueWithLeftShift(*MaybeImmed);
5297}
5298
5299/// SelectNegArithImmed - As above, but negates the value before trying to
5300/// select it.
5301InstructionSelector::ComplexRendererFns
5302AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5303 // We need a register here, because we need to know if we have a 64 or 32
5304 // bit immediate.
5305 if (!Root.isReg())
5306 return None;
5307 auto MaybeImmed = getImmedFromMO(Root);
5308 if (MaybeImmed == None)
5309 return None;
5310 uint64_t Immed = *MaybeImmed;
5311
5312 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5313 // have the opposite effect on the C flag, so this pattern mustn't match under
5314 // those circumstances.
5315 if (Immed == 0)
5316 return None;
5317
5318 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5319 // the root.
5320 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5321 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5322 Immed = ~((uint32_t)Immed) + 1;
5323 else
5324 Immed = ~Immed + 1ULL;
5325
5326 if (Immed & 0xFFFFFFFFFF000000ULL)
5327 return None;
5328
5329 Immed &= 0xFFFFFFULL;
5330 return select12BitValueWithLeftShift(Immed);
5331}
5332
5333/// Return true if it is worth folding MI into an extended register. That is,
5334/// if it's safe to pull it into the addressing mode of a load or store as a
5335/// shift.
5336bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5337 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5338 // Always fold if there is one use, or if we're optimizing for size.
5339 Register DefReg = MI.getOperand(0).getReg();
5340 if (MRI.hasOneNonDBGUse(DefReg) ||
5341 MI.getParent()->getParent()->getFunction().hasOptSize())
5342 return true;
5343
5344 // It's better to avoid folding and recomputing shifts when we don't have a
5345 // fastpath.
5346 if (!STI.hasLSLFast())
5347 return false;
5348
5349 // We have a fastpath, so folding a shift in and potentially computing it
5350 // many times may be beneficial. Check if this is only used in memory ops.
5351 // If it is, then we should fold.
5352 return all_of(MRI.use_nodbg_instructions(DefReg),
5353 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5354}
5355
5356static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5357 switch (Type) {
5358 case AArch64_AM::SXTB:
5359 case AArch64_AM::SXTH:
5360 case AArch64_AM::SXTW:
5361 return true;
5362 default:
5363 return false;
5364 }
5365}
5366
5367InstructionSelector::ComplexRendererFns
5368AArch64InstructionSelector::selectExtendedSHL(
5369 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5370 unsigned SizeInBytes, bool WantsExt) const {
5371 assert(Base.isReg() && "Expected base to be a register operand")(static_cast <bool> (Base.isReg() && "Expected base to be a register operand"
) ? void (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5371, __extension__ __PRETTY_FUNCTION__))
;
5372 assert(Offset.isReg() && "Expected offset to be a register operand")(static_cast <bool> (Offset.isReg() && "Expected offset to be a register operand"
) ? void (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5372, __extension__ __PRETTY_FUNCTION__))
;
5373
5374 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5375 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5376 if (!OffsetInst)
5377 return None;
5378
5379 unsigned OffsetOpc = OffsetInst->getOpcode();
5380 bool LookedThroughZExt = false;
5381 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5382 // Try to look through a ZEXT.
5383 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5384 return None;
5385
5386 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5387 OffsetOpc = OffsetInst->getOpcode();
5388 LookedThroughZExt = true;
5389
5390 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5391 return None;
5392 }
5393 // Make sure that the memory op is a valid size.
5394 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5395 if (LegalShiftVal == 0)
5396 return None;
5397 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5398 return None;
5399
5400 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5401 // register we will offset is the LHS, and the register containing the
5402 // constant is the RHS.
5403 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5404 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5405 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5406 if (!ValAndVReg) {
5407 // We didn't get a constant on the RHS. If the opcode is a shift, then
5408 // we're done.
5409 if (OffsetOpc == TargetOpcode::G_SHL)
5410 return None;
5411
5412 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5413 std::swap(OffsetReg, ConstantReg);
5414 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5415 if (!ValAndVReg)
5416 return None;
5417 }
5418
5419 // The value must fit into 3 bits, and must be positive. Make sure that is
5420 // true.
5421 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5422
5423 // Since we're going to pull this into a shift, the constant value must be
5424 // a power of 2. If we got a multiply, then we need to check this.
5425 if (OffsetOpc == TargetOpcode::G_MUL) {
5426 if (!isPowerOf2_32(ImmVal))
5427 return None;
5428
5429 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5430 ImmVal = Log2_32(ImmVal);
5431 }
5432
5433 if ((ImmVal & 0x7) != ImmVal)
5434 return None;
5435
5436 // We are only allowed to shift by LegalShiftVal. This shift value is built
5437 // into the instruction, so we can't just use whatever we want.
5438 if (ImmVal != LegalShiftVal)
5439 return None;
5440
5441 unsigned SignExtend = 0;
5442 if (WantsExt) {
5443 // Check if the offset is defined by an extend, unless we looked through a
5444 // G_ZEXT earlier.
5445 if (!LookedThroughZExt) {
5446 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5447 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5448 if (Ext == AArch64_AM::InvalidShiftExtend)
5449 return None;
5450
5451 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5452 // We only support SXTW for signed extension here.
5453 if (SignExtend && Ext != AArch64_AM::SXTW)
5454 return None;
5455 OffsetReg = ExtInst->getOperand(1).getReg();
5456 }
5457
5458 // Need a 32-bit wide register here.
5459 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5460 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5461 }
5462
5463 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5464 // offset. Signify that we are shifting by setting the shift flag to 1.
5465 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5466 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5467 [=](MachineInstrBuilder &MIB) {
5468 // Need to add both immediates here to make sure that they are both
5469 // added to the instruction.
5470 MIB.addImm(SignExtend);
5471 MIB.addImm(1);
5472 }}};
5473}
5474
5475/// This is used for computing addresses like this:
5476///
5477/// ldr x1, [x2, x3, lsl #3]
5478///
5479/// Where x2 is the base register, and x3 is an offset register. The shift-left
5480/// is a constant value specific to this load instruction. That is, we'll never
5481/// see anything other than a 3 here (which corresponds to the size of the
5482/// element being loaded.)
5483InstructionSelector::ComplexRendererFns
5484AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5485 MachineOperand &Root, unsigned SizeInBytes) const {
5486 if (!Root.isReg())
5487 return None;
5488 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5489
5490 // We want to find something like this:
5491 //
5492 // val = G_CONSTANT LegalShiftVal
5493 // shift = G_SHL off_reg val
5494 // ptr = G_PTR_ADD base_reg shift
5495 // x = G_LOAD ptr
5496 //
5497 // And fold it into this addressing mode:
5498 //
5499 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5500
5501 // Check if we can find the G_PTR_ADD.
5502 MachineInstr *PtrAdd =
5503 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5504 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5505 return None;
5506
5507 // Now, try to match an opcode which will match our specific offset.
5508 // We want a G_SHL or a G_MUL.
5509 MachineInstr *OffsetInst =
5510 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5511 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5512 OffsetInst->getOperand(0), SizeInBytes,
5513 /*WantsExt=*/false);
5514}
5515
5516/// This is used for computing addresses like this:
5517///
5518/// ldr x1, [x2, x3]
5519///
5520/// Where x2 is the base register, and x3 is an offset register.
5521///
5522/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5523/// this will do so. Otherwise, it will return None.
5524InstructionSelector::ComplexRendererFns
5525AArch64InstructionSelector::selectAddrModeRegisterOffset(
5526 MachineOperand &Root) const {
5527 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5528
5529 // We need a GEP.
5530 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5531 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5532 return None;
5533
5534 // If this is used more than once, let's not bother folding.
5535 // TODO: Check if they are memory ops. If they are, then we can still fold
5536 // without having to recompute anything.
5537 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5538 return None;
5539
5540 // Base is the GEP's LHS, offset is its RHS.
5541 return {{[=](MachineInstrBuilder &MIB) {
5542 MIB.addUse(Gep->getOperand(1).getReg());
5543 },
5544 [=](MachineInstrBuilder &MIB) {
5545 MIB.addUse(Gep->getOperand(2).getReg());
5546 },
5547 [=](MachineInstrBuilder &MIB) {
5548 // Need to add both immediates here to make sure that they are both
5549 // added to the instruction.
5550 MIB.addImm(0);
5551 MIB.addImm(0);
5552 }}};
5553}
5554
5555/// This is intended to be equivalent to selectAddrModeXRO in
5556/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5557InstructionSelector::ComplexRendererFns
5558AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5559 unsigned SizeInBytes) const {
5560 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5561 if (!Root.isReg())
5562 return None;
5563 MachineInstr *PtrAdd =
5564 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5565 if (!PtrAdd)
5566 return None;
5567
5568 // Check for an immediates which cannot be encoded in the [base + imm]
5569 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5570 // end up with code like:
5571 //
5572 // mov x0, wide
5573 // add x1 base, x0
5574 // ldr x2, [x1, x0]
5575 //
5576 // In this situation, we can use the [base, xreg] addressing mode to save an
5577 // add/sub:
5578 //
5579 // mov x0, wide
5580 // ldr x2, [base, x0]
5581 auto ValAndVReg =
5582 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5583 if (ValAndVReg) {
5584 unsigned Scale = Log2_32(SizeInBytes);
5585 int64_t ImmOff = ValAndVReg->Value.getSExtValue();