Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 893, column 7
6th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-01-20-132452-38772-1 -x c++ /build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64MachineFunctionInfo.h"
16#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "MCTargetDesc/AArch64MCTargetDesc.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27#include "llvm/CodeGen/GlobalISel/Utils.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineConstantPool.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineOperand.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/CodeGen/TargetOpcodes.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/PatternMatch.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/IntrinsicsAArch64.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/raw_ostream.h"
44
45#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50namespace {
51
52#define GET_GLOBALISEL_PREDICATE_BITSET
53#include "AArch64GenGlobalISel.inc"
54#undef GET_GLOBALISEL_PREDICATE_BITSET
55
56class AArch64InstructionSelector : public InstructionSelector {
57public:
58 AArch64InstructionSelector(const AArch64TargetMachine &TM,
59 const AArch64Subtarget &STI,
60 const AArch64RegisterBankInfo &RBI);
61
62 bool select(MachineInstr &I) override;
63 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
64
65 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
66 CodeGenCoverage &CoverageInfo) override {
67 InstructionSelector::setupMF(MF, KB, CoverageInfo);
68
69 // hasFnAttribute() is expensive to call on every BRCOND selection, so
70 // cache it here for each run of the selector.
71 ProduceNonFlagSettingCondBr =
72 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
73 MFReturnAddr = Register();
74
75 processPHIs(MF);
76 }
77
78private:
79 /// tblgen-erated 'select' implementation, used as the initial selector for
80 /// the patterns that don't require complex C++.
81 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
82
83 // A lowering phase that runs before any selection attempts.
84 // Returns true if the instruction was modified.
85 bool preISelLower(MachineInstr &I);
86
87 // An early selection function that runs before the selectImpl() call.
88 bool earlySelect(MachineInstr &I) const;
89
90 // Do some preprocessing of G_PHIs before we begin selection.
91 void processPHIs(MachineFunction &MF);
92
93 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
94
95 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
96 bool contractCrossBankCopyIntoStore(MachineInstr &I,
97 MachineRegisterInfo &MRI);
98
99 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
100
101 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
102 MachineRegisterInfo &MRI) const;
103 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
104 MachineRegisterInfo &MRI) const;
105
106 ///@{
107 /// Helper functions for selectCompareBranch.
108 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
109 MachineIRBuilder &MIB) const;
110 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
111 MachineIRBuilder &MIB) const;
112 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
113 MachineIRBuilder &MIB) const;
114 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
115 MachineBasicBlock *DstMBB,
116 MachineIRBuilder &MIB) const;
117 ///@}
118
119 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
120 MachineRegisterInfo &MRI) const;
121
122 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
123 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
124
125 // Helper to generate an equivalent of scalar_to_vector into a new register,
126 // returned via 'Dst'.
127 MachineInstr *emitScalarToVector(unsigned EltSize,
128 const TargetRegisterClass *DstRC,
129 Register Scalar,
130 MachineIRBuilder &MIRBuilder) const;
131
132 /// Emit a lane insert into \p DstReg, or a new vector register if None is
133 /// provided.
134 ///
135 /// The lane inserted into is defined by \p LaneIdx. The vector source
136 /// register is given by \p SrcReg. The register containing the element is
137 /// given by \p EltReg.
138 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
139 Register EltReg, unsigned LaneIdx,
140 const RegisterBank &RB,
141 MachineIRBuilder &MIRBuilder) const;
142 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
143 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
144 MachineRegisterInfo &MRI) const;
145 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
146 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
147 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
148
149 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
152 bool selectSplitVectorUnmerge(MachineInstr &I,
153 MachineRegisterInfo &MRI) const;
154 bool selectIntrinsicWithSideEffects(MachineInstr &I,
155 MachineRegisterInfo &MRI) const;
156 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
157 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
158 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
159 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
160 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
161 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
162 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
163 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
164
165 unsigned emitConstantPoolEntry(const Constant *CPVal,
166 MachineFunction &MF) const;
167 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
168 MachineIRBuilder &MIRBuilder) const;
169
170 // Emit a vector concat operation.
171 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
172 Register Op2,
173 MachineIRBuilder &MIRBuilder) const;
174
175 // Emit an integer compare between LHS and RHS, which checks for Predicate.
176 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
177 MachineOperand &Predicate,
178 MachineIRBuilder &MIRBuilder) const;
179
180 /// Emit a floating point comparison between \p LHS and \p RHS.
181 /// \p Pred if given is the intended predicate to use.
182 MachineInstr *emitFPCompare(Register LHS, Register RHS,
183 MachineIRBuilder &MIRBuilder,
184 Optional<CmpInst::Predicate> = None) const;
185
186 MachineInstr *emitInstr(unsigned Opcode,
187 std::initializer_list<llvm::DstOp> DstOps,
188 std::initializer_list<llvm::SrcOp> SrcOps,
189 MachineIRBuilder &MIRBuilder,
190 const ComplexRendererFns &RenderFns = None) const;
191 /// Helper function to emit an add or sub instruction.
192 ///
193 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
194 /// in a specific order.
195 ///
196 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
197 ///
198 /// \code
199 /// const std::array<std::array<unsigned, 2>, 4> Table {
200 /// {{AArch64::ADDXri, AArch64::ADDWri},
201 /// {AArch64::ADDXrs, AArch64::ADDWrs},
202 /// {AArch64::ADDXrr, AArch64::ADDWrr},
203 /// {AArch64::SUBXri, AArch64::SUBWri},
204 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
205 /// \endcode
206 ///
207 /// Each row in the table corresponds to a different addressing mode. Each
208 /// column corresponds to a different register size.
209 ///
210 /// \attention Rows must be structured as follows:
211 /// - Row 0: The ri opcode variants
212 /// - Row 1: The rs opcode variants
213 /// - Row 2: The rr opcode variants
214 /// - Row 3: The ri opcode variants for negative immediates
215 /// - Row 4: The rx opcode variants
216 ///
217 /// \attention Columns must be structured as follows:
218 /// - Column 0: The 64-bit opcode variants
219 /// - Column 1: The 32-bit opcode variants
220 ///
221 /// \p Dst is the destination register of the binop to emit.
222 /// \p LHS is the left-hand operand of the binop to emit.
223 /// \p RHS is the right-hand operand of the binop to emit.
224 MachineInstr *emitAddSub(
225 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
226 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
227 MachineIRBuilder &MIRBuilder) const;
228 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
229 MachineOperand &RHS,
230 MachineIRBuilder &MIRBuilder) const;
231 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
232 MachineIRBuilder &MIRBuilder) const;
233 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
234 MachineIRBuilder &MIRBuilder) const;
235 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
236 MachineIRBuilder &MIRBuilder) const;
237 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
238 MachineIRBuilder &MIRBuilder) const;
239 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
240 AArch64CC::CondCode CC,
241 MachineIRBuilder &MIRBuilder) const;
242 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
243 const RegisterBank &DstRB, LLT ScalarTy,
244 Register VecReg, unsigned LaneIdx,
245 MachineIRBuilder &MIRBuilder) const;
246
247 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
248 /// materialized using a FMOV instruction, then update MI and return it.
249 /// Otherwise, do nothing and return a nullptr.
250 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
251 MachineRegisterInfo &MRI) const;
252
253 /// Emit a CSet for an integer compare.
254 ///
255 /// \p DefReg is expected to be a 32-bit scalar register.
256 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
257 MachineIRBuilder &MIRBuilder) const;
258 /// Emit a CSet for a FP compare.
259 ///
260 /// \p Dst is expected to be a 32-bit scalar register.
261 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
262 MachineIRBuilder &MIRBuilder) const;
263
264 /// Emit the overflow op for \p Opcode.
265 ///
266 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
267 /// G_USUBO, etc.
268 std::pair<MachineInstr *, AArch64CC::CondCode>
269 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
270 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
271
272 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
273 /// \p IsNegative is true if the test should be "not zero".
274 /// This will also optimize the test bit instruction when possible.
275 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
276 MachineBasicBlock *DstMBB,
277 MachineIRBuilder &MIB) const;
278
279 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
280 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
281 MachineBasicBlock *DestMBB,
282 MachineIRBuilder &MIB) const;
283
284 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
285 // We use these manually instead of using the importer since it doesn't
286 // support SDNodeXForm.
287 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
288 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
289 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
290 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
291
292 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
293 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
294 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
295
296 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
297 unsigned Size) const;
298
299 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
300 return selectAddrModeUnscaled(Root, 1);
301 }
302 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
303 return selectAddrModeUnscaled(Root, 2);
304 }
305 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
306 return selectAddrModeUnscaled(Root, 4);
307 }
308 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
309 return selectAddrModeUnscaled(Root, 8);
310 }
311 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
312 return selectAddrModeUnscaled(Root, 16);
313 }
314
315 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
316 /// from complex pattern matchers like selectAddrModeIndexed().
317 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
318 MachineRegisterInfo &MRI) const;
319
320 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
321 unsigned Size) const;
322 template <int Width>
323 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
324 return selectAddrModeIndexed(Root, Width / 8);
325 }
326
327 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
328 const MachineRegisterInfo &MRI) const;
329 ComplexRendererFns
330 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
331 unsigned SizeInBytes) const;
332
333 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
334 /// or not a shift + extend should be folded into an addressing mode. Returns
335 /// None when this is not profitable or possible.
336 ComplexRendererFns
337 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
338 MachineOperand &Offset, unsigned SizeInBytes,
339 bool WantsExt) const;
340 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
341 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
342 unsigned SizeInBytes) const;
343 template <int Width>
344 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
345 return selectAddrModeXRO(Root, Width / 8);
346 }
347
348 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
349 unsigned SizeInBytes) const;
350 template <int Width>
351 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
352 return selectAddrModeWRO(Root, Width / 8);
353 }
354
355 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
356
357 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
358 return selectShiftedRegister(Root);
359 }
360
361 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
362 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
363 // For now, make them the same. The only difference between the two is that
364 // logical shifts are allowed to fold in rotates. Otherwise, these are
365 // functionally the same.
366 return selectShiftedRegister(Root);
367 }
368
369 /// Given an extend instruction, determine the correct shift-extend type for
370 /// that instruction.
371 ///
372 /// If the instruction is going to be used in a load or store, pass
373 /// \p IsLoadStore = true.
374 AArch64_AM::ShiftExtendType
375 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
376 bool IsLoadStore = false) const;
377
378 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
379 ///
380 /// \returns Either \p Reg if no change was necessary, or the new register
381 /// created by moving \p Reg.
382 ///
383 /// Note: This uses emitCopy right now.
384 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
385 MachineIRBuilder &MIB) const;
386
387 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
388
389 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
390 int OpIdx = -1) const;
391 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
392 int OpIdx = -1) const;
393 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
394 int OpIdx = -1) const;
395
396 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
397 void materializeLargeCMVal(MachineInstr &I, const Value *V,
398 unsigned OpFlags) const;
399
400 // Optimization methods.
401 bool tryOptSelect(MachineInstr &MI) const;
402 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
403 MachineOperand &Predicate,
404 MachineIRBuilder &MIRBuilder) const;
405
406 /// Return true if \p MI is a load or store of \p NumBytes bytes.
407 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
408
409 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
410 /// register zeroed out. In other words, the result of MI has been explicitly
411 /// zero extended.
412 bool isDef32(const MachineInstr &MI) const;
413
414 const AArch64TargetMachine &TM;
415 const AArch64Subtarget &STI;
416 const AArch64InstrInfo &TII;
417 const AArch64RegisterInfo &TRI;
418 const AArch64RegisterBankInfo &RBI;
419
420 bool ProduceNonFlagSettingCondBr = false;
421
422 // Some cached values used during selection.
423 // We use LR as a live-in register, and we keep track of it here as it can be
424 // clobbered by calls.
425 Register MFReturnAddr;
426
427#define GET_GLOBALISEL_PREDICATES_DECL
428#include "AArch64GenGlobalISel.inc"
429#undef GET_GLOBALISEL_PREDICATES_DECL
430
431// We declare the temporaries used by selectImpl() in the class to minimize the
432// cost of constructing placeholder values.
433#define GET_GLOBALISEL_TEMPORARIES_DECL
434#include "AArch64GenGlobalISel.inc"
435#undef GET_GLOBALISEL_TEMPORARIES_DECL
436};
437
438} // end anonymous namespace
439
440#define GET_GLOBALISEL_IMPL
441#include "AArch64GenGlobalISel.inc"
442#undef GET_GLOBALISEL_IMPL
443
444AArch64InstructionSelector::AArch64InstructionSelector(
445 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
446 const AArch64RegisterBankInfo &RBI)
447 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
448 TRI(*STI.getRegisterInfo()), RBI(RBI),
449#define GET_GLOBALISEL_PREDICATES_INIT
450#include "AArch64GenGlobalISel.inc"
451#undef GET_GLOBALISEL_PREDICATES_INIT
452#define GET_GLOBALISEL_TEMPORARIES_INIT
453#include "AArch64GenGlobalISel.inc"
454#undef GET_GLOBALISEL_TEMPORARIES_INIT
455{
456}
457
458// FIXME: This should be target-independent, inferred from the types declared
459// for each class in the bank.
460static const TargetRegisterClass *
461getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
462 const RegisterBankInfo &RBI,
463 bool GetAllRegSet = false) {
464 if (RB.getID() == AArch64::GPRRegBankID) {
465 if (Ty.getSizeInBits() <= 32)
466 return GetAllRegSet ? &AArch64::GPR32allRegClass
467 : &AArch64::GPR32RegClass;
468 if (Ty.getSizeInBits() == 64)
469 return GetAllRegSet ? &AArch64::GPR64allRegClass
470 : &AArch64::GPR64RegClass;
471 return nullptr;
472 }
473
474 if (RB.getID() == AArch64::FPRRegBankID) {
475 if (Ty.getSizeInBits() <= 16)
476 return &AArch64::FPR16RegClass;
477 if (Ty.getSizeInBits() == 32)
478 return &AArch64::FPR32RegClass;
479 if (Ty.getSizeInBits() == 64)
480 return &AArch64::FPR64RegClass;
481 if (Ty.getSizeInBits() == 128)
482 return &AArch64::FPR128RegClass;
483 return nullptr;
484 }
485
486 return nullptr;
487}
488
489/// Given a register bank, and size in bits, return the smallest register class
490/// that can represent that combination.
491static const TargetRegisterClass *
492getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
493 bool GetAllRegSet = false) {
494 unsigned RegBankID = RB.getID();
495
496 if (RegBankID == AArch64::GPRRegBankID) {
497 if (SizeInBits <= 32)
498 return GetAllRegSet ? &AArch64::GPR32allRegClass
499 : &AArch64::GPR32RegClass;
500 if (SizeInBits == 64)
501 return GetAllRegSet ? &AArch64::GPR64allRegClass
502 : &AArch64::GPR64RegClass;
503 }
504
505 if (RegBankID == AArch64::FPRRegBankID) {
506 switch (SizeInBits) {
507 default:
508 return nullptr;
509 case 8:
510 return &AArch64::FPR8RegClass;
511 case 16:
512 return &AArch64::FPR16RegClass;
513 case 32:
514 return &AArch64::FPR32RegClass;
515 case 64:
516 return &AArch64::FPR64RegClass;
517 case 128:
518 return &AArch64::FPR128RegClass;
519 }
520 }
521
522 return nullptr;
523}
524
525/// Returns the correct subregister to use for a given register class.
526static bool getSubRegForClass(const TargetRegisterClass *RC,
527 const TargetRegisterInfo &TRI, unsigned &SubReg) {
528 switch (TRI.getRegSizeInBits(*RC)) {
53
Control jumps to the 'default' case at line 544
529 case 8:
530 SubReg = AArch64::bsub;
531 break;
532 case 16:
533 SubReg = AArch64::hsub;
534 break;
535 case 32:
536 if (RC != &AArch64::FPR32RegClass)
537 SubReg = AArch64::sub_32;
538 else
539 SubReg = AArch64::ssub;
540 break;
541 case 64:
542 SubReg = AArch64::dsub;
543 break;
544 default:
545 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
54
Assuming 'DebugFlag' is false
55
Loop condition is false. Exiting loop
546 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
547 return false;
56
Returning without writing to 'SubReg'
548 }
549
550 return true;
551}
552
553/// Returns the minimum size the given register bank can hold.
554static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
555 switch (RB.getID()) {
556 case AArch64::GPRRegBankID:
557 return 32;
558 case AArch64::FPRRegBankID:
559 return 8;
560 default:
561 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 561)
;
562 }
563}
564
565static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
566 auto &MI = *Root.getParent();
567 auto &MBB = *MI.getParent();
568 auto &MF = *MBB.getParent();
569 auto &MRI = MF.getRegInfo();
570 uint64_t Immed;
571 if (Root.isImm())
572 Immed = Root.getImm();
573 else if (Root.isCImm())
574 Immed = Root.getCImm()->getZExtValue();
575 else if (Root.isReg()) {
576 auto ValAndVReg =
577 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
578 if (!ValAndVReg)
579 return None;
580 Immed = ValAndVReg->Value.getSExtValue();
581 } else
582 return None;
583 return Immed;
584}
585
586/// Check whether \p I is a currently unsupported binary operation:
587/// - it has an unsized type
588/// - an operand is not a vreg
589/// - all operands are not in the same bank
590/// These are checks that should someday live in the verifier, but right now,
591/// these are mostly limitations of the aarch64 selector.
592static bool unsupportedBinOp(const MachineInstr &I,
593 const AArch64RegisterBankInfo &RBI,
594 const MachineRegisterInfo &MRI,
595 const AArch64RegisterInfo &TRI) {
596 LLT Ty = MRI.getType(I.getOperand(0).getReg());
597 if (!Ty.isValid()) {
598 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
599 return true;
600 }
601
602 const RegisterBank *PrevOpBank = nullptr;
603 for (auto &MO : I.operands()) {
604 // FIXME: Support non-register operands.
605 if (!MO.isReg()) {
606 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
607 return true;
608 }
609
610 // FIXME: Can generic operations have physical registers operands? If
611 // so, this will need to be taught about that, and we'll need to get the
612 // bank out of the minimal class for the register.
613 // Either way, this needs to be documented (and possibly verified).
614 if (!Register::isVirtualRegister(MO.getReg())) {
615 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
616 return true;
617 }
618
619 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
620 if (!OpBank) {
621 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
622 return true;
623 }
624
625 if (PrevOpBank && OpBank != PrevOpBank) {
626 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
627 return true;
628 }
629 PrevOpBank = OpBank;
630 }
631 return false;
632}
633
634/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
635/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
636/// and of size \p OpSize.
637/// \returns \p GenericOpc if the combination is unsupported.
638static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
639 unsigned OpSize) {
640 switch (RegBankID) {
641 case AArch64::GPRRegBankID:
642 if (OpSize == 32) {
643 switch (GenericOpc) {
644 case TargetOpcode::G_SHL:
645 return AArch64::LSLVWr;
646 case TargetOpcode::G_LSHR:
647 return AArch64::LSRVWr;
648 case TargetOpcode::G_ASHR:
649 return AArch64::ASRVWr;
650 default:
651 return GenericOpc;
652 }
653 } else if (OpSize == 64) {
654 switch (GenericOpc) {
655 case TargetOpcode::G_PTR_ADD:
656 return AArch64::ADDXrr;
657 case TargetOpcode::G_SHL:
658 return AArch64::LSLVXr;
659 case TargetOpcode::G_LSHR:
660 return AArch64::LSRVXr;
661 case TargetOpcode::G_ASHR:
662 return AArch64::ASRVXr;
663 default:
664 return GenericOpc;
665 }
666 }
667 break;
668 case AArch64::FPRRegBankID:
669 switch (OpSize) {
670 case 32:
671 switch (GenericOpc) {
672 case TargetOpcode::G_FADD:
673 return AArch64::FADDSrr;
674 case TargetOpcode::G_FSUB:
675 return AArch64::FSUBSrr;
676 case TargetOpcode::G_FMUL:
677 return AArch64::FMULSrr;
678 case TargetOpcode::G_FDIV:
679 return AArch64::FDIVSrr;
680 default:
681 return GenericOpc;
682 }
683 case 64:
684 switch (GenericOpc) {
685 case TargetOpcode::G_FADD:
686 return AArch64::FADDDrr;
687 case TargetOpcode::G_FSUB:
688 return AArch64::FSUBDrr;
689 case TargetOpcode::G_FMUL:
690 return AArch64::FMULDrr;
691 case TargetOpcode::G_FDIV:
692 return AArch64::FDIVDrr;
693 case TargetOpcode::G_OR:
694 return AArch64::ORRv8i8;
695 default:
696 return GenericOpc;
697 }
698 }
699 break;
700 }
701 return GenericOpc;
702}
703
704/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
705/// appropriate for the (value) register bank \p RegBankID and of memory access
706/// size \p OpSize. This returns the variant with the base+unsigned-immediate
707/// addressing mode (e.g., LDRXui).
708/// \returns \p GenericOpc if the combination is unsupported.
709static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
710 unsigned OpSize) {
711 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
712 switch (RegBankID) {
713 case AArch64::GPRRegBankID:
714 switch (OpSize) {
715 case 8:
716 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
717 case 16:
718 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
719 case 32:
720 return isStore ? AArch64::STRWui : AArch64::LDRWui;
721 case 64:
722 return isStore ? AArch64::STRXui : AArch64::LDRXui;
723 }
724 break;
725 case AArch64::FPRRegBankID:
726 switch (OpSize) {
727 case 8:
728 return isStore ? AArch64::STRBui : AArch64::LDRBui;
729 case 16:
730 return isStore ? AArch64::STRHui : AArch64::LDRHui;
731 case 32:
732 return isStore ? AArch64::STRSui : AArch64::LDRSui;
733 case 64:
734 return isStore ? AArch64::STRDui : AArch64::LDRDui;
735 }
736 break;
737 }
738 return GenericOpc;
739}
740
741#ifndef NDEBUG
742/// Helper function that verifies that we have a valid copy at the end of
743/// selectCopy. Verifies that the source and dest have the expected sizes and
744/// then returns true.
745static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
746 const MachineRegisterInfo &MRI,
747 const TargetRegisterInfo &TRI,
748 const RegisterBankInfo &RBI) {
749 const Register DstReg = I.getOperand(0).getReg();
750 const Register SrcReg = I.getOperand(1).getReg();
751 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
752 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
753
754 // Make sure the size of the source and dest line up.
755 assert((((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
756 (DstSize == SrcSize ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
757 // Copies are a mean to setup initial types, the number of(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
758 // bits may not exactly match.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
759 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
760 // Copies are a mean to copy bits around, as long as we are(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
761 // on the same register class, that's fine. Otherwise, that(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
762 // means we need some SUBREG_TO_REG or AND & co.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
763 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
764 "Copy with different width?!")(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
;
765
766 // Check the size of the destination.
767 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 768, __PRETTY_FUNCTION__))
768 "GPRs cannot get more than 64-bit width values")(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 768, __PRETTY_FUNCTION__))
;
769
770 return true;
771}
772#endif
773
774/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
775/// to \p *To.
776///
777/// E.g "To = COPY SrcReg:SubReg"
778static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
779 const RegisterBankInfo &RBI, Register SrcReg,
780 const TargetRegisterClass *To, unsigned SubReg) {
781 assert(SrcReg.isValid() && "Expected a valid source register?")((SrcReg.isValid() && "Expected a valid source register?"
) ? static_cast<void> (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 781, __PRETTY_FUNCTION__))
;
782 assert(To && "Destination register class cannot be null")((To && "Destination register class cannot be null") ?
static_cast<void> (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 782, __PRETTY_FUNCTION__))
;
783 assert(SubReg && "Expected a valid subregister")((SubReg && "Expected a valid subregister") ? static_cast
<void> (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 783, __PRETTY_FUNCTION__))
;
784
785 MachineIRBuilder MIB(I);
786 auto SubRegCopy =
787 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
788 MachineOperand &RegOp = I.getOperand(1);
789 RegOp.setReg(SubRegCopy.getReg(0));
790
791 // It's possible that the destination register won't be constrained. Make
792 // sure that happens.
793 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
794 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
795
796 return true;
797}
798
799/// Helper function to get the source and destination register classes for a
800/// copy. Returns a std::pair containing the source register class for the
801/// copy, and the destination register class for the copy. If a register class
802/// cannot be determined, then it will be nullptr.
803static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
804getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
805 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
806 const RegisterBankInfo &RBI) {
807 Register DstReg = I.getOperand(0).getReg();
808 Register SrcReg = I.getOperand(1).getReg();
809 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
810 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
811 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
812 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
813
814 // Special casing for cross-bank copies of s1s. We can technically represent
815 // a 1-bit value with any size of register. The minimum size for a GPR is 32
816 // bits. So, we need to put the FPR on 32 bits as well.
817 //
818 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
819 // then we can pull it into the helpers that get the appropriate class for a
820 // register bank. Or make a new helper that carries along some constraint
821 // information.
822 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
823 SrcSize = DstSize = 32;
824
825 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
826 getMinClassForRegBank(DstRegBank, DstSize, true)};
827}
828
829static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
830 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
831 const RegisterBankInfo &RBI) {
832 Register DstReg = I.getOperand(0).getReg();
833 Register SrcReg = I.getOperand(1).getReg();
834 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
835 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
836
837 // Find the correct register classes for the source and destination registers.
838 const TargetRegisterClass *SrcRC;
839 const TargetRegisterClass *DstRC;
840 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
24
Calling 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
35
Returning from 'tie<const llvm::TargetRegisterClass *, const llvm::TargetRegisterClass *>'
36
Calling 'tuple::operator='
39
Returning from 'tuple::operator='
841
842 if (!DstRC) {
40
Assuming 'DstRC' is non-null
41
Taking false branch
843 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
844 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
845 return false;
846 }
847
848 // A couple helpers below, for making sure that the copy we produce is valid.
849
850 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
851 // to verify that the src and dst are the same size, since that's handled by
852 // the SUBREG_TO_REG.
853 bool KnownValid = false;
854
855 // Returns true, or asserts if something we don't expect happens. Instead of
856 // returning true, we return isValidCopy() to ensure that we verify the
857 // result.
858 auto CheckCopy = [&]() {
859 // If we have a bitcast or something, we can't have physical registers.
860 assert((I.isCopy() ||(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
861 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
862 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
863 "No phys reg on generic operator!")(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
;
864 bool ValidCopy = true;
865#ifndef NDEBUG
866 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
867 assert(ValidCopy && "Invalid copy.")((ValidCopy && "Invalid copy.") ? static_cast<void
> (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 867, __PRETTY_FUNCTION__))
;
868#endif
869 return ValidCopy;
870 };
871
872 // Is this a copy? If so, then we may need to insert a subregister copy.
873 if (I.isCopy()) {
42
Calling 'MachineInstr::isCopy'
45
Returning from 'MachineInstr::isCopy'
46
Taking true branch
874 // Yes. Check if there's anything to fix up.
875 if (!SrcRC) {
47
Assuming 'SrcRC' is non-null
48
Taking false branch
876 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
877 return false;
878 }
879
880 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
881 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
882 unsigned SubReg;
49
'SubReg' declared without an initial value
883
884 // If the source bank doesn't support a subregister copy small enough,
885 // then we first need to copy to the destination bank.
886 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
50
Assuming the condition is true
51
Taking true branch
887 const TargetRegisterClass *DstTempRC =
888 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
889 getSubRegForClass(DstRC, TRI, SubReg);
52
Calling 'getSubRegForClass'
57
Returning from 'getSubRegForClass'
890
891 MachineIRBuilder MIB(I);
892 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
893 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
58
6th function call argument is an uninitialized value
894 } else if (SrcSize > DstSize) {
895 // If the source register is bigger than the destination we need to
896 // perform a subregister copy.
897 const TargetRegisterClass *SubRegRC =
898 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
899 getSubRegForClass(SubRegRC, TRI, SubReg);
900 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
901 } else if (DstSize > SrcSize) {
902 // If the destination register is bigger than the source we need to do
903 // a promotion using SUBREG_TO_REG.
904 const TargetRegisterClass *PromotionRC =
905 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
906 getSubRegForClass(SrcRC, TRI, SubReg);
907
908 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
909 BuildMI(*I.getParent(), I, I.getDebugLoc(),
910 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
911 .addImm(0)
912 .addUse(SrcReg)
913 .addImm(SubReg);
914 MachineOperand &RegOp = I.getOperand(1);
915 RegOp.setReg(PromoteReg);
916
917 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
918 KnownValid = true;
919 }
920
921 // If the destination is a physical register, then there's nothing to
922 // change, so we're done.
923 if (Register::isPhysicalRegister(DstReg))
924 return CheckCopy();
925 }
926
927 // No need to constrain SrcReg. It will get constrained when we hit another
928 // of its use or its defs. Copies do not have constraints.
929 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
930 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
931 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
932 return false;
933 }
934 I.setDesc(TII.get(AArch64::COPY));
935 return CheckCopy();
936}
937
938static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
939 if (!DstTy.isScalar() || !SrcTy.isScalar())
940 return GenericOpc;
941
942 const unsigned DstSize = DstTy.getSizeInBits();
943 const unsigned SrcSize = SrcTy.getSizeInBits();
944
945 switch (DstSize) {
946 case 32:
947 switch (SrcSize) {
948 case 32:
949 switch (GenericOpc) {
950 case TargetOpcode::G_SITOFP:
951 return AArch64::SCVTFUWSri;
952 case TargetOpcode::G_UITOFP:
953 return AArch64::UCVTFUWSri;
954 case TargetOpcode::G_FPTOSI:
955 return AArch64::FCVTZSUWSr;
956 case TargetOpcode::G_FPTOUI:
957 return AArch64::FCVTZUUWSr;
958 default:
959 return GenericOpc;
960 }
961 case 64:
962 switch (GenericOpc) {
963 case TargetOpcode::G_SITOFP:
964 return AArch64::SCVTFUXSri;
965 case TargetOpcode::G_UITOFP:
966 return AArch64::UCVTFUXSri;
967 case TargetOpcode::G_FPTOSI:
968 return AArch64::FCVTZSUWDr;
969 case TargetOpcode::G_FPTOUI:
970 return AArch64::FCVTZUUWDr;
971 default:
972 return GenericOpc;
973 }
974 default:
975 return GenericOpc;
976 }
977 case 64:
978 switch (SrcSize) {
979 case 32:
980 switch (GenericOpc) {
981 case TargetOpcode::G_SITOFP:
982 return AArch64::SCVTFUWDri;
983 case TargetOpcode::G_UITOFP:
984 return AArch64::UCVTFUWDri;
985 case TargetOpcode::G_FPTOSI:
986 return AArch64::FCVTZSUXSr;
987 case TargetOpcode::G_FPTOUI:
988 return AArch64::FCVTZUUXSr;
989 default:
990 return GenericOpc;
991 }
992 case 64:
993 switch (GenericOpc) {
994 case TargetOpcode::G_SITOFP:
995 return AArch64::SCVTFUXDri;
996 case TargetOpcode::G_UITOFP:
997 return AArch64::UCVTFUXDri;
998 case TargetOpcode::G_FPTOSI:
999 return AArch64::FCVTZSUXDr;
1000 case TargetOpcode::G_FPTOUI:
1001 return AArch64::FCVTZUUXDr;
1002 default:
1003 return GenericOpc;
1004 }
1005 default:
1006 return GenericOpc;
1007 }
1008 default:
1009 return GenericOpc;
1010 };
1011 return GenericOpc;
1012}
1013
1014MachineInstr *
1015AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1016 Register False, AArch64CC::CondCode CC,
1017 MachineIRBuilder &MIB) const {
1018 MachineRegisterInfo &MRI = *MIB.getMRI();
1019 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1021, __PRETTY_FUNCTION__))
1020 RBI.getRegBank(True, MRI, TRI)->getID() &&((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1021, __PRETTY_FUNCTION__))
1021 "Expected both select operands to have the same regbank?")((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1021, __PRETTY_FUNCTION__))
;
1022 LLT Ty = MRI.getType(True);
1023 if (Ty.isVector())
1024 return nullptr;
1025 const unsigned Size = Ty.getSizeInBits();
1026 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1027, __PRETTY_FUNCTION__))
1027 "Expected 32 bit or 64 bit select only?")(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1027, __PRETTY_FUNCTION__))
;
1028 const bool Is32Bit = Size == 32;
1029 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1030 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1031 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1032 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1033 return &*FCSel;
1034 }
1035
1036 // By default, we'll try and emit a CSEL.
1037 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1038 bool Optimized = false;
1039 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1040 &Optimized](Register &Reg, Register &OtherReg,
1041 bool Invert) {
1042 if (Optimized)
1043 return false;
1044
1045 // Attempt to fold:
1046 //
1047 // %sub = G_SUB 0, %x
1048 // %select = G_SELECT cc, %reg, %sub
1049 //
1050 // Into:
1051 // %select = CSNEG %reg, %x, cc
1052 Register MatchReg;
1053 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1054 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1055 Reg = MatchReg;
1056 if (Invert) {
1057 CC = AArch64CC::getInvertedCondCode(CC);
1058 std::swap(Reg, OtherReg);
1059 }
1060 return true;
1061 }
1062
1063 // Attempt to fold:
1064 //
1065 // %xor = G_XOR %x, -1
1066 // %select = G_SELECT cc, %reg, %xor
1067 //
1068 // Into:
1069 // %select = CSINV %reg, %x, cc
1070 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1071 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1072 Reg = MatchReg;
1073 if (Invert) {
1074 CC = AArch64CC::getInvertedCondCode(CC);
1075 std::swap(Reg, OtherReg);
1076 }
1077 return true;
1078 }
1079
1080 // Attempt to fold:
1081 //
1082 // %add = G_ADD %x, 1
1083 // %select = G_SELECT cc, %reg, %add
1084 //
1085 // Into:
1086 // %select = CSINC %reg, %x, cc
1087 if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
1088 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1089 Reg = MatchReg;
1090 if (Invert) {
1091 CC = AArch64CC::getInvertedCondCode(CC);
1092 std::swap(Reg, OtherReg);
1093 }
1094 return true;
1095 }
1096
1097 return false;
1098 };
1099
1100 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1101 // true/false values are constants.
1102 // FIXME: All of these patterns already exist in tablegen. We should be
1103 // able to import these.
1104 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1105 &Optimized]() {
1106 if (Optimized)
1107 return false;
1108 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1109 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1110 if (!TrueCst && !FalseCst)
1111 return false;
1112
1113 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1114 if (TrueCst && FalseCst) {
1115 int64_t T = TrueCst->Value.getSExtValue();
1116 int64_t F = FalseCst->Value.getSExtValue();
1117
1118 if (T == 0 && F == 1) {
1119 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1120 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1121 True = ZReg;
1122 False = ZReg;
1123 return true;
1124 }
1125
1126 if (T == 0 && F == -1) {
1127 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1128 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1129 True = ZReg;
1130 False = ZReg;
1131 return true;
1132 }
1133 }
1134
1135 if (TrueCst) {
1136 int64_t T = TrueCst->Value.getSExtValue();
1137 if (T == 1) {
1138 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1139 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1140 True = False;
1141 False = ZReg;
1142 CC = AArch64CC::getInvertedCondCode(CC);
1143 return true;
1144 }
1145
1146 if (T == -1) {
1147 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1148 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1149 True = False;
1150 False = ZReg;
1151 CC = AArch64CC::getInvertedCondCode(CC);
1152 return true;
1153 }
1154 }
1155
1156 if (FalseCst) {
1157 int64_t F = FalseCst->Value.getSExtValue();
1158 if (F == 1) {
1159 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1160 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1161 False = ZReg;
1162 return true;
1163 }
1164
1165 if (F == -1) {
1166 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1167 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1168 False = ZReg;
1169 return true;
1170 }
1171 }
1172 return false;
1173 };
1174
1175 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1176 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1177 Optimized |= TryOptSelectCst();
1178 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1179 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1180 return &*SelectInst;
1181}
1182
1183static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1184 switch (P) {
1185 default:
1186 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1186)
;
1187 case CmpInst::ICMP_NE:
1188 return AArch64CC::NE;
1189 case CmpInst::ICMP_EQ:
1190 return AArch64CC::EQ;
1191 case CmpInst::ICMP_SGT:
1192 return AArch64CC::GT;
1193 case CmpInst::ICMP_SGE:
1194 return AArch64CC::GE;
1195 case CmpInst::ICMP_SLT:
1196 return AArch64CC::LT;
1197 case CmpInst::ICMP_SLE:
1198 return AArch64CC::LE;
1199 case CmpInst::ICMP_UGT:
1200 return AArch64CC::HI;
1201 case CmpInst::ICMP_UGE:
1202 return AArch64CC::HS;
1203 case CmpInst::ICMP_ULT:
1204 return AArch64CC::LO;
1205 case CmpInst::ICMP_ULE:
1206 return AArch64CC::LS;
1207 }
1208}
1209
1210static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1211 AArch64CC::CondCode &CondCode,
1212 AArch64CC::CondCode &CondCode2) {
1213 CondCode2 = AArch64CC::AL;
1214 switch (P) {
1215 default:
1216 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1216)
;
1217 case CmpInst::FCMP_OEQ:
1218 CondCode = AArch64CC::EQ;
1219 break;
1220 case CmpInst::FCMP_OGT:
1221 CondCode = AArch64CC::GT;
1222 break;
1223 case CmpInst::FCMP_OGE:
1224 CondCode = AArch64CC::GE;
1225 break;
1226 case CmpInst::FCMP_OLT:
1227 CondCode = AArch64CC::MI;
1228 break;
1229 case CmpInst::FCMP_OLE:
1230 CondCode = AArch64CC::LS;
1231 break;
1232 case CmpInst::FCMP_ONE:
1233 CondCode = AArch64CC::MI;
1234 CondCode2 = AArch64CC::GT;
1235 break;
1236 case CmpInst::FCMP_ORD:
1237 CondCode = AArch64CC::VC;
1238 break;
1239 case CmpInst::FCMP_UNO:
1240 CondCode = AArch64CC::VS;
1241 break;
1242 case CmpInst::FCMP_UEQ:
1243 CondCode = AArch64CC::EQ;
1244 CondCode2 = AArch64CC::VS;
1245 break;
1246 case CmpInst::FCMP_UGT:
1247 CondCode = AArch64CC::HI;
1248 break;
1249 case CmpInst::FCMP_UGE:
1250 CondCode = AArch64CC::PL;
1251 break;
1252 case CmpInst::FCMP_ULT:
1253 CondCode = AArch64CC::LT;
1254 break;
1255 case CmpInst::FCMP_ULE:
1256 CondCode = AArch64CC::LE;
1257 break;
1258 case CmpInst::FCMP_UNE:
1259 CondCode = AArch64CC::NE;
1260 break;
1261 }
1262}
1263
1264/// Return a register which can be used as a bit to test in a TB(N)Z.
1265static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1266 MachineRegisterInfo &MRI) {
1267 assert(Reg.isValid() && "Expected valid register!")((Reg.isValid() && "Expected valid register!") ? static_cast
<void> (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1267, __PRETTY_FUNCTION__))
;
1268 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1269 unsigned Opc = MI->getOpcode();
1270
1271 if (!MI->getOperand(0).isReg() ||
1272 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1273 break;
1274
1275 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1276 //
1277 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1278 // on the truncated x is the same as the bit number on x.
1279 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1280 Opc == TargetOpcode::G_TRUNC) {
1281 Register NextReg = MI->getOperand(1).getReg();
1282 // Did we find something worth folding?
1283 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1284 break;
1285
1286 // NextReg is worth folding. Keep looking.
1287 Reg = NextReg;
1288 continue;
1289 }
1290
1291 // Attempt to find a suitable operation with a constant on one side.
1292 Optional<uint64_t> C;
1293 Register TestReg;
1294 switch (Opc) {
1295 default:
1296 break;
1297 case TargetOpcode::G_AND:
1298 case TargetOpcode::G_XOR: {
1299 TestReg = MI->getOperand(1).getReg();
1300 Register ConstantReg = MI->getOperand(2).getReg();
1301 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1302 if (!VRegAndVal) {
1303 // AND commutes, check the other side for a constant.
1304 // FIXME: Can we canonicalize the constant so that it's always on the
1305 // same side at some point earlier?
1306 std::swap(ConstantReg, TestReg);
1307 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1308 }
1309 if (VRegAndVal)
1310 C = VRegAndVal->Value.getSExtValue();
1311 break;
1312 }
1313 case TargetOpcode::G_ASHR:
1314 case TargetOpcode::G_LSHR:
1315 case TargetOpcode::G_SHL: {
1316 TestReg = MI->getOperand(1).getReg();
1317 auto VRegAndVal =
1318 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1319 if (VRegAndVal)
1320 C = VRegAndVal->Value.getSExtValue();
1321 break;
1322 }
1323 }
1324
1325 // Didn't find a constant or viable register. Bail out of the loop.
1326 if (!C || !TestReg.isValid())
1327 break;
1328
1329 // We found a suitable instruction with a constant. Check to see if we can
1330 // walk through the instruction.
1331 Register NextReg;
1332 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1333 switch (Opc) {
1334 default:
1335 break;
1336 case TargetOpcode::G_AND:
1337 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1338 if ((*C >> Bit) & 1)
1339 NextReg = TestReg;
1340 break;
1341 case TargetOpcode::G_SHL:
1342 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1343 // the type of the register.
1344 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1345 NextReg = TestReg;
1346 Bit = Bit - *C;
1347 }
1348 break;
1349 case TargetOpcode::G_ASHR:
1350 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1351 // in x
1352 NextReg = TestReg;
1353 Bit = Bit + *C;
1354 if (Bit >= TestRegSize)
1355 Bit = TestRegSize - 1;
1356 break;
1357 case TargetOpcode::G_LSHR:
1358 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1359 if ((Bit + *C) < TestRegSize) {
1360 NextReg = TestReg;
1361 Bit = Bit + *C;
1362 }
1363 break;
1364 case TargetOpcode::G_XOR:
1365 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1366 // appropriate.
1367 //
1368 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1369 //
1370 // tbz x', b -> tbnz x, b
1371 //
1372 // Because x' only has the b-th bit set if x does not.
1373 if ((*C >> Bit) & 1)
1374 Invert = !Invert;
1375 NextReg = TestReg;
1376 break;
1377 }
1378
1379 // Check if we found anything worth folding.
1380 if (!NextReg.isValid())
1381 return Reg;
1382 Reg = NextReg;
1383 }
1384
1385 return Reg;
1386}
1387
1388MachineInstr *AArch64InstructionSelector::emitTestBit(
1389 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1390 MachineIRBuilder &MIB) const {
1391 assert(TestReg.isValid())((TestReg.isValid()) ? static_cast<void> (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1391, __PRETTY_FUNCTION__))
;
1392 assert(ProduceNonFlagSettingCondBr &&((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1393, __PRETTY_FUNCTION__))
1393 "Cannot emit TB(N)Z with speculation tracking!")((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1393, __PRETTY_FUNCTION__))
;
1394 MachineRegisterInfo &MRI = *MIB.getMRI();
1395
1396 // Attempt to optimize the test bit by walking over instructions.
1397 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1398 LLT Ty = MRI.getType(TestReg);
1399 unsigned Size = Ty.getSizeInBits();
1400 assert(!Ty.isVector() && "Expected a scalar!")((!Ty.isVector() && "Expected a scalar!") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1400, __PRETTY_FUNCTION__))
;
1401 assert(Bit < 64 && "Bit is too large!")((Bit < 64 && "Bit is too large!") ? static_cast<
void> (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1401, __PRETTY_FUNCTION__))
;
1402
1403 // When the test register is a 64-bit register, we have to narrow to make
1404 // TBNZW work.
1405 bool UseWReg = Bit < 32;
1406 unsigned NecessarySize = UseWReg ? 32 : 64;
1407 if (Size != NecessarySize)
1408 TestReg = moveScalarRegClass(
1409 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1410 MIB);
1411
1412 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1413 {AArch64::TBZW, AArch64::TBNZW}};
1414 unsigned Opc = OpcTable[UseWReg][IsNegative];
1415 auto TestBitMI =
1416 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1417 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1418 return &*TestBitMI;
1419}
1420
1421bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1422 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1423 MachineIRBuilder &MIB) const {
1424 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")((AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"
) ? static_cast<void> (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1424, __PRETTY_FUNCTION__))
;
1425 // Given something like this:
1426 //
1427 // %x = ...Something...
1428 // %one = G_CONSTANT i64 1
1429 // %zero = G_CONSTANT i64 0
1430 // %and = G_AND %x, %one
1431 // %cmp = G_ICMP intpred(ne), %and, %zero
1432 // %cmp_trunc = G_TRUNC %cmp
1433 // G_BRCOND %cmp_trunc, %bb.3
1434 //
1435 // We want to try and fold the AND into the G_BRCOND and produce either a
1436 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1437 //
1438 // In this case, we'd get
1439 //
1440 // TBNZ %x %bb.3
1441 //
1442
1443 // Check if the AND has a constant on its RHS which we can use as a mask.
1444 // If it's a power of 2, then it's the same as checking a specific bit.
1445 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1446 auto MaybeBit = getConstantVRegValWithLookThrough(
1447 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1448 if (!MaybeBit)
1449 return false;
1450
1451 int32_t Bit = MaybeBit->Value.exactLogBase2();
1452 if (Bit < 0)
1453 return false;
1454
1455 Register TestReg = AndInst.getOperand(1).getReg();
1456
1457 // Emit a TB(N)Z.
1458 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1459 return true;
1460}
1461
1462MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1463 bool IsNegative,
1464 MachineBasicBlock *DestMBB,
1465 MachineIRBuilder &MIB) const {
1466 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")((ProduceNonFlagSettingCondBr && "CBZ does not set flags!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1466, __PRETTY_FUNCTION__))
;
1467 MachineRegisterInfo &MRI = *MIB.getMRI();
1468 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1470, __PRETTY_FUNCTION__))
1469 AArch64::GPRRegBankID &&((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1470, __PRETTY_FUNCTION__))
1470 "Expected GPRs only?")((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1470, __PRETTY_FUNCTION__))
;
1471 auto Ty = MRI.getType(CompareReg);
1472 unsigned Width = Ty.getSizeInBits();
1473 assert(!Ty.isVector() && "Expected scalar only?")((!Ty.isVector() && "Expected scalar only?") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1473, __PRETTY_FUNCTION__))
;
1474 assert(Width <= 64 && "Expected width to be at most 64?")((Width <= 64 && "Expected width to be at most 64?"
) ? static_cast<void> (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1474, __PRETTY_FUNCTION__))
;
1475 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1476 {AArch64::CBNZW, AArch64::CBNZX}};
1477 unsigned Opc = OpcTable[IsNegative][Width == 64];
1478 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1479 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1480 return &*BranchMI;
1481}
1482
1483bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1484 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1485 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)((FCmp.getOpcode() == TargetOpcode::G_FCMP) ? static_cast<
void> (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1485, __PRETTY_FUNCTION__))
;
1486 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1486, __PRETTY_FUNCTION__))
;
1487 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1488 // totally clean. Some of them require two branches to implement.
1489 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1490 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1491 Pred);
1492 AArch64CC::CondCode CC1, CC2;
1493 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1494 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1495 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1496 if (CC2 != AArch64CC::AL)
1497 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1498 I.eraseFromParent();
1499 return true;
1500}
1501
1502bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1503 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1504 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1504, __PRETTY_FUNCTION__))
;
1505 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1505, __PRETTY_FUNCTION__))
;
1506 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1507 //
1508 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1509 // instructions will not be produced, as they are conditional branch
1510 // instructions that do not set flags.
1511 if (!ProduceNonFlagSettingCondBr)
1512 return false;
1513
1514 MachineRegisterInfo &MRI = *MIB.getMRI();
1515 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1516 auto Pred =
1517 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1518 Register LHS = ICmp.getOperand(2).getReg();
1519 Register RHS = ICmp.getOperand(3).getReg();
1520
1521 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1522 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1523 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1524
1525 // When we can emit a TB(N)Z, prefer that.
1526 //
1527 // Handle non-commutative condition codes first.
1528 // Note that we don't want to do this when we have a G_AND because it can
1529 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1530 if (VRegAndVal && !AndInst) {
1531 int64_t C = VRegAndVal->Value.getSExtValue();
1532
1533 // When we have a greater-than comparison, we can just test if the msb is
1534 // zero.
1535 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1536 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1537 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1538 I.eraseFromParent();
1539 return true;
1540 }
1541
1542 // When we have a less than comparison, we can just test if the msb is not
1543 // zero.
1544 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1545 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1546 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1547 I.eraseFromParent();
1548 return true;
1549 }
1550 }
1551
1552 // Attempt to handle commutative condition codes. Right now, that's only
1553 // eq/ne.
1554 if (ICmpInst::isEquality(Pred)) {
1555 if (!VRegAndVal) {
1556 std::swap(RHS, LHS);
1557 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1558 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1559 }
1560
1561 if (VRegAndVal && VRegAndVal->Value == 0) {
1562 // If there's a G_AND feeding into this branch, try to fold it away by
1563 // emitting a TB(N)Z instead.
1564 //
1565 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1566 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1567 // would be redundant.
1568 if (AndInst &&
1569 tryOptAndIntoCompareBranch(
1570 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1571 I.eraseFromParent();
1572 return true;
1573 }
1574
1575 // Otherwise, try to emit a CB(N)Z instead.
1576 auto LHSTy = MRI.getType(LHS);
1577 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1578 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1579 I.eraseFromParent();
1580 return true;
1581 }
1582 }
1583 }
1584
1585 return false;
1586}
1587
1588bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1589 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1590 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1590, __PRETTY_FUNCTION__))
;
1591 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1591, __PRETTY_FUNCTION__))
;
1592 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1593 return true;
1594
1595 // Couldn't optimize. Emit a compare + a Bcc.
1596 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1597 auto PredOp = ICmp.getOperand(1);
1598 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1599 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1600 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1601 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1602 I.eraseFromParent();
1603 return true;
1604}
1605
1606bool AArch64InstructionSelector::selectCompareBranch(
1607 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1608 Register CondReg = I.getOperand(0).getReg();
1609 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1610 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1611 CondReg = CCMI->getOperand(1).getReg();
1612 CCMI = MRI.getVRegDef(CondReg);
1613 }
1614
1615 // Try to select the G_BRCOND using whatever is feeding the condition if
1616 // possible.
1617 MachineIRBuilder MIB(I);
1618 unsigned CCMIOpc = CCMI->getOpcode();
1619 if (CCMIOpc == TargetOpcode::G_FCMP)
1620 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1621 if (CCMIOpc == TargetOpcode::G_ICMP)
1622 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1623
1624 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1625 // instructions will not be produced, as they are conditional branch
1626 // instructions that do not set flags.
1627 if (ProduceNonFlagSettingCondBr) {
1628 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1629 I.getOperand(1).getMBB(), MIB);
1630 I.eraseFromParent();
1631 return true;
1632 }
1633
1634 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1635 auto TstMI =
1636 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1637 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1638 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1639 .addImm(AArch64CC::EQ)
1640 .addMBB(I.getOperand(1).getMBB());
1641 I.eraseFromParent();
1642 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1643}
1644
1645/// Returns the element immediate value of a vector shift operand if found.
1646/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1647static Optional<int64_t> getVectorShiftImm(Register Reg,
1648 MachineRegisterInfo &MRI) {
1649 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")((MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1649, __PRETTY_FUNCTION__))
;
1650 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1651 assert(OpMI && "Expected to find a vreg def for vector shift operand")((OpMI && "Expected to find a vreg def for vector shift operand"
) ? static_cast<void> (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1651, __PRETTY_FUNCTION__))
;
1652 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1653 return None;
1654
1655 // Check all operands are identical immediates.
1656 int64_t ImmVal = 0;
1657 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1658 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1659 if (!VRegAndVal)
1660 return None;
1661
1662 if (Idx == 1)
1663 ImmVal = VRegAndVal->Value.getSExtValue();
1664 if (ImmVal != VRegAndVal->Value.getSExtValue())
1665 return None;
1666 }
1667
1668 return ImmVal;
1669}
1670
1671/// Matches and returns the shift immediate value for a SHL instruction given
1672/// a shift operand.
1673static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1674 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1675 if (!ShiftImm)
1676 return None;
1677 // Check the immediate is in range for a SHL.
1678 int64_t Imm = *ShiftImm;
1679 if (Imm < 0)
1680 return None;
1681 switch (SrcTy.getElementType().getSizeInBits()) {
1682 default:
1683 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1684 return None;
1685 case 8:
1686 if (Imm > 7)
1687 return None;
1688 break;
1689 case 16:
1690 if (Imm > 15)
1691 return None;
1692 break;
1693 case 32:
1694 if (Imm > 31)
1695 return None;
1696 break;
1697 case 64:
1698 if (Imm > 63)
1699 return None;
1700 break;
1701 }
1702 return Imm;
1703}
1704
1705bool AArch64InstructionSelector::selectVectorSHL(
1706 MachineInstr &I, MachineRegisterInfo &MRI) const {
1707 assert(I.getOpcode() == TargetOpcode::G_SHL)((I.getOpcode() == TargetOpcode::G_SHL) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1707, __PRETTY_FUNCTION__))
;
1708 Register DstReg = I.getOperand(0).getReg();
1709 const LLT Ty = MRI.getType(DstReg);
1710 Register Src1Reg = I.getOperand(1).getReg();
1711 Register Src2Reg = I.getOperand(2).getReg();
1712
1713 if (!Ty.isVector())
1714 return false;
1715
1716 // Check if we have a vector of constants on RHS that we can select as the
1717 // immediate form.
1718 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1719
1720 unsigned Opc = 0;
1721 if (Ty == LLT::vector(2, 64)) {
1722 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1723 } else if (Ty == LLT::vector(4, 32)) {
1724 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1725 } else if (Ty == LLT::vector(2, 32)) {
1726 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1727 } else if (Ty == LLT::vector(4, 16)) {
1728 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1729 } else if (Ty == LLT::vector(8, 16)) {
1730 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1731 } else if (Ty == LLT::vector(16, 8)) {
1732 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1733 } else if (Ty == LLT::vector(8, 8)) {
1734 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1735 } else {
1736 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1737 return false;
1738 }
1739
1740 MachineIRBuilder MIB(I);
1741 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1742 if (ImmVal)
1743 Shl.addImm(*ImmVal);
1744 else
1745 Shl.addUse(Src2Reg);
1746 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1747 I.eraseFromParent();
1748 return true;
1749}
1750
1751bool AArch64InstructionSelector::selectVectorAshrLshr(
1752 MachineInstr &I, MachineRegisterInfo &MRI) const {
1753 assert(I.getOpcode() == TargetOpcode::G_ASHR ||((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1754, __PRETTY_FUNCTION__))
1754 I.getOpcode() == TargetOpcode::G_LSHR)((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1754, __PRETTY_FUNCTION__))
;
1755 Register DstReg = I.getOperand(0).getReg();
1756 const LLT Ty = MRI.getType(DstReg);
1757 Register Src1Reg = I.getOperand(1).getReg();
1758 Register Src2Reg = I.getOperand(2).getReg();
1759
1760 if (!Ty.isVector())
1761 return false;
1762
1763 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1764
1765 // We expect the immediate case to be lowered in the PostLegalCombiner to
1766 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1767
1768 // There is not a shift right register instruction, but the shift left
1769 // register instruction takes a signed value, where negative numbers specify a
1770 // right shift.
1771
1772 unsigned Opc = 0;
1773 unsigned NegOpc = 0;
1774 const TargetRegisterClass *RC =
1775 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1776 if (Ty == LLT::vector(2, 64)) {
1777 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1778 NegOpc = AArch64::NEGv2i64;
1779 } else if (Ty == LLT::vector(4, 32)) {
1780 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1781 NegOpc = AArch64::NEGv4i32;
1782 } else if (Ty == LLT::vector(2, 32)) {
1783 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1784 NegOpc = AArch64::NEGv2i32;
1785 } else if (Ty == LLT::vector(4, 16)) {
1786 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1787 NegOpc = AArch64::NEGv4i16;
1788 } else if (Ty == LLT::vector(8, 16)) {
1789 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1790 NegOpc = AArch64::NEGv8i16;
1791 } else if (Ty == LLT::vector(16, 8)) {
1792 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1793 NegOpc = AArch64::NEGv8i16;
1794 } else if (Ty == LLT::vector(8, 8)) {
1795 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1796 NegOpc = AArch64::NEGv8i8;
1797 } else {
1798 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1799 return false;
1800 }
1801
1802 MachineIRBuilder MIB(I);
1803 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1804 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1805 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1806 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1807 I.eraseFromParent();
1808 return true;
1809}
1810
1811bool AArch64InstructionSelector::selectVaStartAAPCS(
1812 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1813 return false;
1814}
1815
1816bool AArch64InstructionSelector::selectVaStartDarwin(
1817 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1818 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1819 Register ListReg = I.getOperand(0).getReg();
1820
1821 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1822
1823 auto MIB =
1824 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1825 .addDef(ArgsAddrReg)
1826 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1827 .addImm(0)
1828 .addImm(0);
1829
1830 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1831
1832 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1833 .addUse(ArgsAddrReg)
1834 .addUse(ListReg)
1835 .addImm(0)
1836 .addMemOperand(*I.memoperands_begin());
1837
1838 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1839 I.eraseFromParent();
1840 return true;
1841}
1842
1843void AArch64InstructionSelector::materializeLargeCMVal(
1844 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1845 MachineBasicBlock &MBB = *I.getParent();
1846 MachineFunction &MF = *MBB.getParent();
1847 MachineRegisterInfo &MRI = MF.getRegInfo();
1848 MachineIRBuilder MIB(I);
1849
1850 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1851 MovZ->addOperand(MF, I.getOperand(1));
1852 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1853 AArch64II::MO_NC);
1854 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1855 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1856
1857 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1858 Register ForceDstReg) {
1859 Register DstReg = ForceDstReg
1860 ? ForceDstReg
1861 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1862 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1863 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1864 MovI->addOperand(MF, MachineOperand::CreateGA(
1865 GV, MovZ->getOperand(1).getOffset(), Flags));
1866 } else {
1867 MovI->addOperand(
1868 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1869 MovZ->getOperand(1).getOffset(), Flags));
1870 }
1871 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1872 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1873 return DstReg;
1874 };
1875 Register DstReg = BuildMovK(MovZ.getReg(0),
1876 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1877 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1878 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1879}
1880
1881bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1882 MachineBasicBlock &MBB = *I.getParent();
1883 MachineFunction &MF = *MBB.getParent();
1884 MachineRegisterInfo &MRI = MF.getRegInfo();
1885
1886 switch (I.getOpcode()) {
1887 case TargetOpcode::G_SHL:
1888 case TargetOpcode::G_ASHR:
1889 case TargetOpcode::G_LSHR: {
1890 // These shifts are legalized to have 64 bit shift amounts because we want
1891 // to take advantage of the existing imported selection patterns that assume
1892 // the immediates are s64s. However, if the shifted type is 32 bits and for
1893 // some reason we receive input GMIR that has an s64 shift amount that's not
1894 // a G_CONSTANT, insert a truncate so that we can still select the s32
1895 // register-register variant.
1896 Register SrcReg = I.getOperand(1).getReg();
1897 Register ShiftReg = I.getOperand(2).getReg();
1898 const LLT ShiftTy = MRI.getType(ShiftReg);
1899 const LLT SrcTy = MRI.getType(SrcReg);
1900 if (SrcTy.isVector())
1901 return false;
1902 assert(!ShiftTy.isVector() && "unexpected vector shift ty")((!ShiftTy.isVector() && "unexpected vector shift ty"
) ? static_cast<void> (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1902, __PRETTY_FUNCTION__))
;
1903 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1904 return false;
1905 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1906 assert(AmtMI && "could not find a vreg definition for shift amount")((AmtMI && "could not find a vreg definition for shift amount"
) ? static_cast<void> (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1906, __PRETTY_FUNCTION__))
;
1907 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1908 // Insert a subregister copy to implement a 64->32 trunc
1909 MachineIRBuilder MIB(I);
1910 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1911 .addReg(ShiftReg, 0, AArch64::sub_32);
1912 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1913 I.getOperand(2).setReg(Trunc.getReg(0));
1914 }
1915 return true;
1916 }
1917 case TargetOpcode::G_STORE:
1918 return contractCrossBankCopyIntoStore(I, MRI);
1919 case TargetOpcode::G_PTR_ADD:
1920 return convertPtrAddToAdd(I, MRI);
1921 case TargetOpcode::G_LOAD: {
1922 // For scalar loads of pointers, we try to convert the dest type from p0
1923 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1924 // conversion, this should be ok because all users should have been
1925 // selected already, so the type doesn't matter for them.
1926 Register DstReg = I.getOperand(0).getReg();
1927 const LLT DstTy = MRI.getType(DstReg);
1928 if (!DstTy.isPointer())
1929 return false;
1930 MRI.setType(DstReg, LLT::scalar(64));
1931 return true;
1932 }
1933 case AArch64::G_DUP: {
1934 // Convert the type from p0 to s64 to help selection.
1935 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1936 if (!DstTy.getElementType().isPointer())
1937 return false;
1938 MachineIRBuilder MIB(I);
1939 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1940 MRI.setType(I.getOperand(0).getReg(),
1941 DstTy.changeElementType(LLT::scalar(64)));
1942 MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1943 I.getOperand(1).setReg(NewSrc.getReg(0));
1944 return true;
1945 }
1946 case TargetOpcode::G_UITOFP:
1947 case TargetOpcode::G_SITOFP: {
1948 // If both source and destination regbanks are FPR, then convert the opcode
1949 // to G_SITOF so that the importer can select it to an fpr variant.
1950 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1951 // copy.
1952 Register SrcReg = I.getOperand(1).getReg();
1953 LLT SrcTy = MRI.getType(SrcReg);
1954 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1955 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1956 return false;
1957
1958 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1959 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1960 I.setDesc(TII.get(AArch64::G_SITOF));
1961 else
1962 I.setDesc(TII.get(AArch64::G_UITOF));
1963 return true;
1964 }
1965 return false;
1966 }
1967 default:
1968 return false;
1969 }
1970}
1971
1972/// This lowering tries to look for G_PTR_ADD instructions and then converts
1973/// them to a standard G_ADD with a COPY on the source.
1974///
1975/// The motivation behind this is to expose the add semantics to the imported
1976/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1977/// because the selector works bottom up, uses before defs. By the time we
1978/// end up trying to select a G_PTR_ADD, we should have already attempted to
1979/// fold this into addressing modes and were therefore unsuccessful.
1980bool AArch64InstructionSelector::convertPtrAddToAdd(
1981 MachineInstr &I, MachineRegisterInfo &MRI) {
1982 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1982, __PRETTY_FUNCTION__))
;
1983 Register DstReg = I.getOperand(0).getReg();
1984 Register AddOp1Reg = I.getOperand(1).getReg();
1985 const LLT PtrTy = MRI.getType(DstReg);
1986 if (PtrTy.getAddressSpace() != 0)
1987 return false;
1988
1989 MachineIRBuilder MIB(I);
1990 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1991 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1992 // Set regbanks on the registers.
1993 if (PtrTy.isVector())
1994 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1995 else
1996 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1997
1998 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1999 // %dst(intty) = G_ADD %intbase, off
2000 I.setDesc(TII.get(TargetOpcode::G_ADD));
2001 MRI.setType(DstReg, CastPtrTy);
2002 I.getOperand(1).setReg(PtrToInt.getReg(0));
2003 if (!select(*PtrToInt)) {
2004 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2005 return false;
2006 }
2007
2008 // Also take the opportunity here to try to do some optimization.
2009 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2010 Register NegatedReg;
2011 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2012 return true;
2013 I.getOperand(2).setReg(NegatedReg);
2014 I.setDesc(TII.get(TargetOpcode::G_SUB));
2015 return true;
2016}
2017
2018bool AArch64InstructionSelector::earlySelectSHL(
2019 MachineInstr &I, MachineRegisterInfo &MRI) const {
2020 // We try to match the immediate variant of LSL, which is actually an alias
2021 // for a special case of UBFM. Otherwise, we fall back to the imported
2022 // selector which will match the register variant.
2023 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")((I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2023, __PRETTY_FUNCTION__))
;
2024 const auto &MO = I.getOperand(2);
2025 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2026 if (!VRegAndVal)
2027 return false;
2028
2029 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2030 if (DstTy.isVector())
2031 return false;
2032 bool Is64Bit = DstTy.getSizeInBits() == 64;
2033 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2034 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2035 MachineIRBuilder MIB(I);
2036
2037 if (!Imm1Fn || !Imm2Fn)
2038 return false;
2039
2040 auto NewI =
2041 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2042 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2043
2044 for (auto &RenderFn : *Imm1Fn)
2045 RenderFn(NewI);
2046 for (auto &RenderFn : *Imm2Fn)
2047 RenderFn(NewI);
2048
2049 I.eraseFromParent();
2050 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2051}
2052
2053bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2054 MachineInstr &I, MachineRegisterInfo &MRI) {
2055 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")((I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2055, __PRETTY_FUNCTION__))
;
2056 // If we're storing a scalar, it doesn't matter what register bank that
2057 // scalar is on. All that matters is the size.
2058 //
2059 // So, if we see something like this (with a 32-bit scalar as an example):
2060 //
2061 // %x:gpr(s32) = ... something ...
2062 // %y:fpr(s32) = COPY %x:gpr(s32)
2063 // G_STORE %y:fpr(s32)
2064 //
2065 // We can fix this up into something like this:
2066 //
2067 // G_STORE %x:gpr(s32)
2068 //
2069 // And then continue the selection process normally.
2070 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2071 if (!DefDstReg.isValid())
2072 return false;
2073 LLT DefDstTy = MRI.getType(DefDstReg);
2074 Register StoreSrcReg = I.getOperand(0).getReg();
2075 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2076
2077 // If we get something strange like a physical register, then we shouldn't
2078 // go any further.
2079 if (!DefDstTy.isValid())
2080 return false;
2081
2082 // Are the source and dst types the same size?
2083 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2084 return false;
2085
2086 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2087 RBI.getRegBank(DefDstReg, MRI, TRI))
2088 return false;
2089
2090 // We have a cross-bank copy, which is entering a store. Let's fold it.
2091 I.getOperand(0).setReg(DefDstReg);
2092 return true;
2093}
2094
2095bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2096 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2096, __PRETTY_FUNCTION__))
;
2097 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2097, __PRETTY_FUNCTION__))
;
2098
2099 MachineBasicBlock &MBB = *I.getParent();
2100 MachineFunction &MF = *MBB.getParent();
2101 MachineRegisterInfo &MRI = MF.getRegInfo();
2102
2103 switch (I.getOpcode()) {
2104 case TargetOpcode::G_BR: {
2105 // If the branch jumps to the fallthrough block, don't bother emitting it.
2106 // Only do this for -O0 for a good code size improvement, because when
2107 // optimizations are enabled we want to leave this choice to
2108 // MachineBlockPlacement.
2109 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2110 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2111 return false;
2112 I.eraseFromParent();
2113 return true;
2114 }
2115 case TargetOpcode::G_SHL:
2116 return earlySelectSHL(I, MRI);
2117 case TargetOpcode::G_CONSTANT: {
2118 bool IsZero = false;
2119 if (I.getOperand(1).isCImm())
2120 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2121 else if (I.getOperand(1).isImm())
2122 IsZero = I.getOperand(1).getImm() == 0;
2123
2124 if (!IsZero)
2125 return false;
2126
2127 Register DefReg = I.getOperand(0).getReg();
2128 LLT Ty = MRI.getType(DefReg);
2129 if (Ty.getSizeInBits() == 64) {
2130 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2131 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2132 } else if (Ty.getSizeInBits() == 32) {
2133 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2134 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2135 } else
2136 return false;
2137
2138 I.setDesc(TII.get(TargetOpcode::COPY));
2139 return true;
2140 }
2141 default:
2142 return false;
2143 }
2144}
2145
2146bool AArch64InstructionSelector::select(MachineInstr &I) {
2147 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2147, __PRETTY_FUNCTION__))
;
2148 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2148, __PRETTY_FUNCTION__))
;
2149
2150 MachineBasicBlock &MBB = *I.getParent();
2151 MachineFunction &MF = *MBB.getParent();
2152 MachineRegisterInfo &MRI = MF.getRegInfo();
2153
2154 const AArch64Subtarget *Subtarget =
2155 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2156 if (Subtarget->requiresStrictAlign()) {
2157 // We don't support this feature yet.
2158 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2159 return false;
2160 }
2161
2162 unsigned Opcode = I.getOpcode();
2163 // G_PHI requires same handling as PHI
2164 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2165 // Certain non-generic instructions also need some special handling.
2166
2167 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2168 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2169
2170 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2171 const Register DefReg = I.getOperand(0).getReg();
2172 const LLT DefTy = MRI.getType(DefReg);
2173
2174 const RegClassOrRegBank &RegClassOrBank =
2175 MRI.getRegClassOrRegBank(DefReg);
2176
2177 const TargetRegisterClass *DefRC
2178 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2179 if (!DefRC) {
2180 if (!DefTy.isValid()) {
2181 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2182 return false;
2183 }
2184 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2185 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2186 if (!DefRC) {
2187 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2188 return false;
2189 }
2190 }
2191
2192 I.setDesc(TII.get(TargetOpcode::PHI));
2193
2194 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2195 }
2196
2197 if (I.isCopy())
2198 return selectCopy(I, TII, MRI, TRI, RBI);
2199
2200 return true;
2201 }
2202
2203
2204 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2205 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2206 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2207 return false;
2208 }
2209
2210 // Try to do some lowering before we start instruction selecting. These
2211 // lowerings are purely transformations on the input G_MIR and so selection
2212 // must continue after any modification of the instruction.
2213 if (preISelLower(I)) {
2214 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2215 }
2216
2217 // There may be patterns where the importer can't deal with them optimally,
2218 // but does select it to a suboptimal sequence so our custom C++ selection
2219 // code later never has a chance to work on it. Therefore, we have an early
2220 // selection attempt here to give priority to certain selection routines
2221 // over the imported ones.
2222 if (earlySelect(I))
2223 return true;
2224
2225 if (selectImpl(I, *CoverageInfo))
2226 return true;
2227
2228 LLT Ty =
2229 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2230
2231 MachineIRBuilder MIB(I);
2232
2233 switch (Opcode) {
2234 case TargetOpcode::G_BRCOND:
2235 return selectCompareBranch(I, MF, MRI);
2236
2237 case TargetOpcode::G_BRINDIRECT: {
2238 I.setDesc(TII.get(AArch64::BR));
2239 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2240 }
2241
2242 case TargetOpcode::G_BRJT:
2243 return selectBrJT(I, MRI);
2244
2245 case AArch64::G_ADD_LOW: {
2246 // This op may have been separated from it's ADRP companion by the localizer
2247 // or some other code motion pass. Given that many CPUs will try to
2248 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2249 // which will later be expanded into an ADRP+ADD pair after scheduling.
2250 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2251 if (BaseMI->getOpcode() != AArch64::ADRP) {
2252 I.setDesc(TII.get(AArch64::ADDXri));
2253 I.addOperand(MachineOperand::CreateImm(0));
2254 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2255 }
2256 assert(TM.getCodeModel() == CodeModel::Small &&((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2257, __PRETTY_FUNCTION__))
2257 "Expected small code model")((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2257, __PRETTY_FUNCTION__))
;
2258 MachineIRBuilder MIB(I);
2259 auto Op1 = BaseMI->getOperand(1);
2260 auto Op2 = I.getOperand(2);
2261 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2262 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2263 Op1.getTargetFlags())
2264 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2265 Op2.getTargetFlags());
2266 I.eraseFromParent();
2267 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2268 }
2269
2270 case TargetOpcode::G_BSWAP: {
2271 // Handle vector types for G_BSWAP directly.
2272 Register DstReg = I.getOperand(0).getReg();
2273 LLT DstTy = MRI.getType(DstReg);
2274
2275 // We should only get vector types here; everything else is handled by the
2276 // importer right now.
2277 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2278 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2279 return false;
2280 }
2281
2282 // Only handle 4 and 2 element vectors for now.
2283 // TODO: 16-bit elements.
2284 unsigned NumElts = DstTy.getNumElements();
2285 if (NumElts != 4 && NumElts != 2) {
2286 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2287 return false;
2288 }
2289
2290 // Choose the correct opcode for the supported types. Right now, that's
2291 // v2s32, v4s32, and v2s64.
2292 unsigned Opc = 0;
2293 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2294 if (EltSize == 32)
2295 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2296 : AArch64::REV32v16i8;
2297 else if (EltSize == 64)
2298 Opc = AArch64::REV64v16i8;
2299
2300 // We should always get something by the time we get here...
2301 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")((Opc != 0 && "Didn't get an opcode for G_BSWAP?") ? static_cast
<void> (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2301, __PRETTY_FUNCTION__))
;
2302
2303 I.setDesc(TII.get(Opc));
2304 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2305 }
2306
2307 case TargetOpcode::G_FCONSTANT:
2308 case TargetOpcode::G_CONSTANT: {
2309 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2310
2311 const LLT s8 = LLT::scalar(8);
2312 const LLT s16 = LLT::scalar(16);
2313 const LLT s32 = LLT::scalar(32);
2314 const LLT s64 = LLT::scalar(64);
2315 const LLT s128 = LLT::scalar(128);
2316 const LLT p0 = LLT::pointer(0, 64);
2317
2318 const Register DefReg = I.getOperand(0).getReg();
2319 const LLT DefTy = MRI.getType(DefReg);
2320 const unsigned DefSize = DefTy.getSizeInBits();
2321 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2322
2323 // FIXME: Redundant check, but even less readable when factored out.
2324 if (isFP) {
2325 if (Ty != s32 && Ty != s64 && Ty != s128) {
2326 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2327 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2328 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
;
2329 return false;
2330 }
2331
2332 if (RB.getID() != AArch64::FPRRegBankID) {
2333 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2334 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2335 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2336 return false;
2337 }
2338
2339 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2340 // can be sure tablegen works correctly and isn't rescued by this code.
2341 // 0.0 is not covered by tablegen for FP128. So we will handle this
2342 // scenario in the code here.
2343 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2344 return false;
2345 } else {
2346 // s32 and s64 are covered by tablegen.
2347 if (Ty != p0 && Ty != s8 && Ty != s16) {
2348 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2349 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2350 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2351 return false;
2352 }
2353
2354 if (RB.getID() != AArch64::GPRRegBankID) {
2355 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2356 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2357 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2358 return false;
2359 }
2360 }
2361
2362 // We allow G_CONSTANT of types < 32b.
2363 const unsigned MovOpc =
2364 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2365
2366 if (isFP) {
2367 // Either emit a FMOV, or emit a copy to emit a normal mov.
2368 const TargetRegisterClass &GPRRC =
2369 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2370 const TargetRegisterClass &FPRRC =
2371 DefSize == 32 ? AArch64::FPR32RegClass
2372 : (DefSize == 64 ? AArch64::FPR64RegClass
2373 : AArch64::FPR128RegClass);
2374
2375 // Can we use a FMOV instruction to represent the immediate?
2376 if (emitFMovForFConstant(I, MRI))
2377 return true;
2378
2379 // For 64b values, emit a constant pool load instead.
2380 if (DefSize == 64 || DefSize == 128) {
2381 auto *FPImm = I.getOperand(1).getFPImm();
2382 MachineIRBuilder MIB(I);
2383 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2384 if (!LoadMI) {
2385 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2386 return false;
2387 }
2388 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2389 I.eraseFromParent();
2390 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2391 }
2392
2393 // Nope. Emit a copy and use a normal mov instead.
2394 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2395 MachineOperand &RegOp = I.getOperand(0);
2396 RegOp.setReg(DefGPRReg);
2397 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2398 MIB.buildCopy({DefReg}, {DefGPRReg});
2399
2400 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2401 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2402 return false;
2403 }
2404
2405 MachineOperand &ImmOp = I.getOperand(1);
2406 // FIXME: Is going through int64_t always correct?
2407 ImmOp.ChangeToImmediate(
2408 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2409 } else if (I.getOperand(1).isCImm()) {
2410 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2411 I.getOperand(1).ChangeToImmediate(Val);
2412 } else if (I.getOperand(1).isImm()) {
2413 uint64_t Val = I.getOperand(1).getImm();
2414 I.getOperand(1).ChangeToImmediate(Val);
2415 }
2416
2417 I.setDesc(TII.get(MovOpc));
2418 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2419 return true;
2420 }
2421 case TargetOpcode::G_EXTRACT: {
2422 Register DstReg = I.getOperand(0).getReg();
2423 Register SrcReg = I.getOperand(1).getReg();
2424 LLT SrcTy = MRI.getType(SrcReg);
2425 LLT DstTy = MRI.getType(DstReg);
2426 (void)DstTy;
2427 unsigned SrcSize = SrcTy.getSizeInBits();
2428
2429 if (SrcTy.getSizeInBits() > 64) {
2430 // This should be an extract of an s128, which is like a vector extract.
2431 if (SrcTy.getSizeInBits() != 128)
2432 return false;
2433 // Only support extracting 64 bits from an s128 at the moment.
2434 if (DstTy.getSizeInBits() != 64)
2435 return false;
2436
2437 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2438 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2439 // Check we have the right regbank always.
2440 assert(SrcRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2442, __PRETTY_FUNCTION__))
2441 DstRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2442, __PRETTY_FUNCTION__))
2442 "Wrong extract regbank!")((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2442, __PRETTY_FUNCTION__))
;
2443 (void)SrcRB;
2444
2445 // Emit the same code as a vector extract.
2446 // Offset must be a multiple of 64.
2447 unsigned Offset = I.getOperand(2).getImm();
2448 if (Offset % 64 != 0)
2449 return false;
2450 unsigned LaneIdx = Offset / 64;
2451 MachineIRBuilder MIB(I);
2452 MachineInstr *Extract = emitExtractVectorElt(
2453 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2454 if (!Extract)
2455 return false;
2456 I.eraseFromParent();
2457 return true;
2458 }
2459
2460 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2461 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2462 Ty.getSizeInBits() - 1);
2463
2464 if (SrcSize < 64) {
2465 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2466, __PRETTY_FUNCTION__))
2466 "unexpected G_EXTRACT types")((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2466, __PRETTY_FUNCTION__))
;
2467 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2468 }
2469
2470 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2471 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2472 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2473 .addReg(DstReg, 0, AArch64::sub_32);
2474 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2475 AArch64::GPR32RegClass, MRI);
2476 I.getOperand(0).setReg(DstReg);
2477
2478 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2479 }
2480
2481 case TargetOpcode::G_INSERT: {
2482 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2483 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2484 unsigned DstSize = DstTy.getSizeInBits();
2485 // Larger inserts are vectors, same-size ones should be something else by
2486 // now (split up or turned into COPYs).
2487 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2488 return false;
2489
2490 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2491 unsigned LSB = I.getOperand(3).getImm();
2492 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2493 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2494 MachineInstrBuilder(MF, I).addImm(Width - 1);
2495
2496 if (DstSize < 64) {
2497 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2498, __PRETTY_FUNCTION__))
2498 "unexpected G_INSERT types")((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2498, __PRETTY_FUNCTION__))
;
2499 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2500 }
2501
2502 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2503 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2504 TII.get(AArch64::SUBREG_TO_REG))
2505 .addDef(SrcReg)
2506 .addImm(0)
2507 .addUse(I.getOperand(2).getReg())
2508 .addImm(AArch64::sub_32);
2509 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2510 AArch64::GPR32RegClass, MRI);
2511 I.getOperand(2).setReg(SrcReg);
2512
2513 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2514 }
2515 case TargetOpcode::G_FRAME_INDEX: {
2516 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2517 if (Ty != LLT::pointer(0, 64)) {
2518 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2519 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2520 return false;
2521 }
2522 I.setDesc(TII.get(AArch64::ADDXri));
2523
2524 // MOs for a #0 shifted immediate.
2525 I.addOperand(MachineOperand::CreateImm(0));
2526 I.addOperand(MachineOperand::CreateImm(0));
2527
2528 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2529 }
2530
2531 case TargetOpcode::G_GLOBAL_VALUE: {
2532 auto GV = I.getOperand(1).getGlobal();
2533 if (GV->isThreadLocal())
2534 return selectTLSGlobalValue(I, MRI);
2535
2536 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2537 if (OpFlags & AArch64II::MO_GOT) {
2538 I.setDesc(TII.get(AArch64::LOADgot));
2539 I.getOperand(1).setTargetFlags(OpFlags);
2540 } else if (TM.getCodeModel() == CodeModel::Large) {
2541 // Materialize the global using movz/movk instructions.
2542 materializeLargeCMVal(I, GV, OpFlags);
2543 I.eraseFromParent();
2544 return true;
2545 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2546 I.setDesc(TII.get(AArch64::ADR));
2547 I.getOperand(1).setTargetFlags(OpFlags);
2548 } else {
2549 I.setDesc(TII.get(AArch64::MOVaddr));
2550 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2551 MachineInstrBuilder MIB(MF, I);
2552 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2553 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2554 }
2555 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2556 }
2557
2558 case TargetOpcode::G_ZEXTLOAD:
2559 case TargetOpcode::G_LOAD:
2560 case TargetOpcode::G_STORE: {
2561 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2562 MachineIRBuilder MIB(I);
2563
2564 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2565
2566 if (PtrTy != LLT::pointer(0, 64)) {
2567 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2568 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2569 return false;
2570 }
2571
2572 auto &MemOp = **I.memoperands_begin();
2573 uint64_t MemSizeInBytes = MemOp.getSize();
2574 if (MemOp.isAtomic()) {
2575 // For now we just support s8 acquire loads to be able to compile stack
2576 // protector code.
2577 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2578 MemSizeInBytes == 1) {
2579 I.setDesc(TII.get(AArch64::LDARB));
2580 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2581 }
2582 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Atomic load/store not fully supported yet\n"
; } } while (false)
;
2583 return false;
2584 }
2585 unsigned MemSizeInBits = MemSizeInBytes * 8;
2586
2587#ifndef NDEBUG
2588 const Register PtrReg = I.getOperand(1).getReg();
2589 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2590 // Sanity-check the pointer register.
2591 assert(PtrRB.getID() == AArch64::GPRRegBankID &&((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2592, __PRETTY_FUNCTION__))
2592 "Load/Store pointer operand isn't a GPR")((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2592, __PRETTY_FUNCTION__))
;
2593 assert(MRI.getType(PtrReg).isPointer() &&((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2594, __PRETTY_FUNCTION__))
2594 "Load/Store pointer operand isn't a pointer")((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2594, __PRETTY_FUNCTION__))
;
2595#endif
2596
2597 const Register ValReg = I.getOperand(0).getReg();
2598 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2599
2600 // Helper lambda for partially selecting I. Either returns the original
2601 // instruction with an updated opcode, or a new instruction.
2602 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2603 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
2604 const unsigned NewOpc =
2605 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2606 if (NewOpc == I.getOpcode())
2607 return nullptr;
2608 // Check if we can fold anything into the addressing mode.
2609 auto AddrModeFns =
2610 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2611 if (!AddrModeFns) {
2612 // Can't fold anything. Use the original instruction.
2613 I.setDesc(TII.get(NewOpc));
2614 I.addOperand(MachineOperand::CreateImm(0));
2615 return &I;
2616 }
2617
2618 // Folded something. Create a new instruction and return it.
2619 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2620 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2621 NewInst.cloneMemRefs(I);
2622 for (auto &Fn : *AddrModeFns)
2623 Fn(NewInst);
2624 I.eraseFromParent();
2625 return &*NewInst;
2626 };
2627
2628 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2629 if (!LoadStore)
2630 return false;
2631
2632 // If we're storing a 0, use WZR/XZR.
2633 if (Opcode == TargetOpcode::G_STORE) {
2634 auto CVal = getConstantVRegValWithLookThrough(
2635 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2636 /*HandleFConstants = */ false);
2637 if (CVal && CVal->Value == 0) {
2638 switch (LoadStore->getOpcode()) {
2639 case AArch64::STRWui:
2640 case AArch64::STRHHui:
2641 case AArch64::STRBBui:
2642 LoadStore->getOperand(0).setReg(AArch64::WZR);
2643 break;
2644 case AArch64::STRXui:
2645 LoadStore->getOperand(0).setReg(AArch64::XZR);
2646 break;
2647 }
2648 }
2649 }
2650
2651 if (IsZExtLoad) {
2652 // The zextload from a smaller type to i32 should be handled by the
2653 // importer.
2654 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2655 return false;
2656 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2657 // and zero_extend with SUBREG_TO_REG.
2658 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2659 Register DstReg = LoadStore->getOperand(0).getReg();
2660 LoadStore->getOperand(0).setReg(LdReg);
2661
2662 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2663 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2664 .addImm(0)
2665 .addUse(LdReg)
2666 .addImm(AArch64::sub_32);
2667 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2668 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2669 MRI);
2670 }
2671 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2672 }
2673
2674 case TargetOpcode::G_SMULH:
2675 case TargetOpcode::G_UMULH: {
2676 // Reject the various things we don't support yet.
2677 if (unsupportedBinOp(I, RBI, MRI, TRI))
2678 return false;
2679
2680 const Register DefReg = I.getOperand(0).getReg();
2681 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2682
2683 if (RB.getID() != AArch64::GPRRegBankID) {
2684 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2685 return false;
2686 }
2687
2688 if (Ty != LLT::scalar(64)) {
2689 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2690 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2691 return false;
2692 }
2693
2694 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2695 : AArch64::UMULHrr;
2696 I.setDesc(TII.get(NewOpc));
2697
2698 // Now that we selected an opcode, we need to constrain the register
2699 // operands to use appropriate classes.
2700 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2701 }
2702 case TargetOpcode::G_LSHR:
2703 case TargetOpcode::G_ASHR:
2704 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2705 return selectVectorAshrLshr(I, MRI);
2706 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2707 case TargetOpcode::G_SHL:
2708 if (Opcode == TargetOpcode::G_SHL &&
2709 MRI.getType(I.getOperand(0).getReg()).isVector())
2710 return selectVectorSHL(I, MRI);
2711 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2712 case TargetOpcode::G_FADD:
2713 case TargetOpcode::G_FSUB:
2714 case TargetOpcode::G_FMUL:
2715 case TargetOpcode::G_FDIV:
2716 case TargetOpcode::G_OR: {
2717 // Reject the various things we don't support yet.
2718 if (unsupportedBinOp(I, RBI, MRI, TRI))
2719 return false;
2720
2721 const unsigned OpSize = Ty.getSizeInBits();
2722
2723 const Register DefReg = I.getOperand(0).getReg();
2724 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2725
2726 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2727 if (NewOpc == I.getOpcode())
2728 return false;
2729
2730 I.setDesc(TII.get(NewOpc));
2731 // FIXME: Should the type be always reset in setDesc?
2732
2733 // Now that we selected an opcode, we need to constrain the register
2734 // operands to use appropriate classes.
2735 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2736 }
2737
2738 case TargetOpcode::G_PTR_ADD: {
2739 MachineIRBuilder MIRBuilder(I);
2740 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2741 MIRBuilder);
2742 I.eraseFromParent();
2743 return true;
2744 }
2745 case TargetOpcode::G_SADDO:
2746 case TargetOpcode::G_UADDO:
2747 case TargetOpcode::G_SSUBO: {
2748 // Emit the operation and get the correct condition code.
2749 MachineIRBuilder MIRBuilder(I);
2750 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2751 I.getOperand(2), I.getOperand(3), MIRBuilder);
2752
2753 // Now, put the overflow result in the register given by the first operand
2754 // to the overflow op. CSINC increments the result when the predicate is
2755 // false, so to get the increment when it's true, we need to use the
2756 // inverse. In this case, we want to increment when carry is set.
2757 Register ZReg = AArch64::WZR;
2758 auto CsetMI = MIRBuilder
2759 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2760 {ZReg, ZReg})
2761 .addImm(getInvertedCondCode(OpAndCC.second));
2762 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2763 I.eraseFromParent();
2764 return true;
2765 }
2766
2767 case TargetOpcode::G_PTRMASK: {
2768 Register MaskReg = I.getOperand(2).getReg();
2769 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2770 // TODO: Implement arbitrary cases
2771 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2772 return false;
2773
2774 uint64_t Mask = *MaskVal;
2775 I.setDesc(TII.get(AArch64::ANDXri));
2776 I.getOperand(2).ChangeToImmediate(
2777 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2778
2779 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2780 }
2781 case TargetOpcode::G_PTRTOINT:
2782 case TargetOpcode::G_TRUNC: {
2783 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2784 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2785
2786 const Register DstReg = I.getOperand(0).getReg();
2787 const Register SrcReg = I.getOperand(1).getReg();
2788
2789 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2790 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2791
2792 if (DstRB.getID() != SrcRB.getID()) {
2793 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2794 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2795 return false;
2796 }
2797
2798 if (DstRB.getID() == AArch64::GPRRegBankID) {
2799 const TargetRegisterClass *DstRC =
2800 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2801 if (!DstRC)
2802 return false;
2803
2804 const TargetRegisterClass *SrcRC =
2805 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2806 if (!SrcRC)
2807 return false;
2808
2809 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2810 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2811 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2812 return false;
2813 }
2814
2815 if (DstRC == SrcRC) {
2816 // Nothing to be done
2817 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2818 SrcTy == LLT::scalar(64)) {
2819 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2819)
;
2820 return false;
2821 } else if (DstRC == &AArch64::GPR32RegClass &&
2822 SrcRC == &AArch64::GPR64RegClass) {
2823 I.getOperand(1).setSubReg(AArch64::sub_32);
2824 } else {
2825 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2826 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2827 return false;
2828 }
2829
2830 I.setDesc(TII.get(TargetOpcode::COPY));
2831 return true;
2832 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2833 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2834 I.setDesc(TII.get(AArch64::XTNv4i16));
2835 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2836 return true;
2837 }
2838
2839 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2840 MachineIRBuilder MIB(I);
2841 MachineInstr *Extract = emitExtractVectorElt(
2842 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2843 if (!Extract)
2844 return false;
2845 I.eraseFromParent();
2846 return true;
2847 }
2848
2849 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2850 if (Opcode == TargetOpcode::G_PTRTOINT) {
2851 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")((DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? static_cast<void> (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2851, __PRETTY_FUNCTION__))
;
2852 I.setDesc(TII.get(TargetOpcode::COPY));
2853 return true;
2854 }
2855 }
2856
2857 return false;
2858 }
2859
2860 case TargetOpcode::G_ANYEXT: {
2861 const Register DstReg = I.getOperand(0).getReg();
2862 const Register SrcReg = I.getOperand(1).getReg();
2863
2864 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2865 if (RBDst.getID() != AArch64::GPRRegBankID) {
2866 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
2867 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
2868 return false;
2869 }
2870
2871 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2872 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2873 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
2874 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
2875 return false;
2876 }
2877
2878 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2879
2880 if (DstSize == 0) {
2881 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
2882 return false;
2883 }
2884
2885 if (DstSize != 64 && DstSize > 32) {
2886 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
2887 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
2888 return false;
2889 }
2890 // At this point G_ANYEXT is just like a plain COPY, but we need
2891 // to explicitly form the 64-bit value if any.
2892 if (DstSize > 32) {
2893 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2894 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2895 .addDef(ExtSrc)
2896 .addImm(0)
2897 .addUse(SrcReg)
2898 .addImm(AArch64::sub_32);
2899 I.getOperand(1).setReg(ExtSrc);
2900 }
2901 return selectCopy(I, TII, MRI, TRI, RBI);
2902 }
2903
2904 case TargetOpcode::G_ZEXT:
2905 case TargetOpcode::G_SEXT_INREG:
2906 case TargetOpcode::G_SEXT: {
2907 unsigned Opcode = I.getOpcode();
2908 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2909 const Register DefReg = I.getOperand(0).getReg();
2910 Register SrcReg = I.getOperand(1).getReg();
2911 const LLT DstTy = MRI.getType(DefReg);
2912 const LLT SrcTy = MRI.getType(SrcReg);
2913 unsigned DstSize = DstTy.getSizeInBits();
2914 unsigned SrcSize = SrcTy.getSizeInBits();
2915
2916 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2917 // extended is encoded in the imm.
2918 if (Opcode == TargetOpcode::G_SEXT_INREG)
2919 SrcSize = I.getOperand(2).getImm();
2920
2921 if (DstTy.isVector())
2922 return false; // Should be handled by imported patterns.
2923
2924 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2926, __PRETTY_FUNCTION__))
2925 AArch64::GPRRegBankID &&(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2926, __PRETTY_FUNCTION__))
2926 "Unexpected ext regbank")(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2926, __PRETTY_FUNCTION__))
;
2927
2928 MachineIRBuilder MIB(I);
2929 MachineInstr *ExtI;
2930
2931 // First check if we're extending the result of a load which has a dest type
2932 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2933 // GPR register on AArch64 and all loads which are smaller automatically
2934 // zero-extend the upper bits. E.g.
2935 // %v(s8) = G_LOAD %p, :: (load 1)
2936 // %v2(s32) = G_ZEXT %v(s8)
2937 if (!IsSigned) {
2938 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2939 bool IsGPR =
2940 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2941 if (LoadMI && IsGPR) {
2942 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2943 unsigned BytesLoaded = MemOp->getSize();
2944 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2945 return selectCopy(I, TII, MRI, TRI, RBI);
2946 }
2947
2948 // If we are zero extending from 32 bits to 64 bits, it's possible that
2949 // the instruction implicitly does the zero extend for us. In that case,
2950 // we can just emit a SUBREG_TO_REG.
2951 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2952 // Unlike with the G_LOAD case, we don't want to look through copies
2953 // here.
2954 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2955 if (Def && isDef32(*Def)) {
2956 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2957 .addImm(0)
2958 .addUse(SrcReg)
2959 .addImm(AArch64::sub_32);
2960
2961 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2962 MRI)) {
2963 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
2964 return false;
2965 }
2966
2967 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2968 MRI)) {
2969 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
2970 return false;
2971 }
2972
2973 I.eraseFromParent();
2974 return true;
2975 }
2976 }
2977 }
2978
2979 if (DstSize == 64) {
2980 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2981 // FIXME: Can we avoid manually doing this?
2982 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2983 MRI)) {
2984 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
2985 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
2986 return false;
2987 }
2988 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2989 {&AArch64::GPR64RegClass}, {})
2990 .addImm(0)
2991 .addUse(SrcReg)
2992 .addImm(AArch64::sub_32)
2993 .getReg(0);
2994 }
2995
2996 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2997 {DefReg}, {SrcReg})
2998 .addImm(0)
2999 .addImm(SrcSize - 1);
3000 } else if (DstSize <= 32) {
3001 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3002 {DefReg}, {SrcReg})
3003 .addImm(0)
3004 .addImm(SrcSize - 1);
3005 } else {
3006 return false;
3007 }
3008
3009 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3010 I.eraseFromParent();
3011 return true;
3012 }
3013
3014 case TargetOpcode::G_SITOFP:
3015 case TargetOpcode::G_UITOFP:
3016 case TargetOpcode::G_FPTOSI:
3017 case TargetOpcode::G_FPTOUI: {
3018 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3019 SrcTy = MRI.getType(I.getOperand(1).getReg());
3020 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3021 if (NewOpc == Opcode)
3022 return false;
3023
3024 I.setDesc(TII.get(NewOpc));
3025 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3026
3027 return true;
3028 }
3029
3030 case TargetOpcode::G_FREEZE:
3031 return selectCopy(I, TII, MRI, TRI, RBI);
3032
3033 case TargetOpcode::G_INTTOPTR:
3034 // The importer is currently unable to import pointer types since they
3035 // didn't exist in SelectionDAG.
3036 return selectCopy(I, TII, MRI, TRI, RBI);
3037
3038 case TargetOpcode::G_BITCAST:
3039 // Imported SelectionDAG rules can handle every bitcast except those that
3040 // bitcast from a type to the same type. Ideally, these shouldn't occur
3041 // but we might not run an optimizer that deletes them. The other exception
3042 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3043 // of them.
3044 return selectCopy(I, TII, MRI, TRI, RBI);
3045
3046 case TargetOpcode::G_SELECT: {
3047 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3048 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3049 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3050 return false;
3051 }
3052
3053 const Register CondReg = I.getOperand(1).getReg();
3054 const Register TReg = I.getOperand(2).getReg();
3055 const Register FReg = I.getOperand(3).getReg();
3056
3057 if (tryOptSelect(I))
3058 return true;
3059
3060 // Make sure to use an unused vreg instead of wzr, so that the peephole
3061 // optimizations will be able to optimize these.
3062 MachineIRBuilder MIB(I);
3063 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3064 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3065 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3066 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3067 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3068 return false;
3069 I.eraseFromParent();
3070 return true;
3071 }
3072 case TargetOpcode::G_ICMP: {
3073 if (Ty.isVector())
3074 return selectVectorICmp(I, MRI);
3075
3076 if (Ty != LLT::scalar(32)) {
3077 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3078 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3079 return false;
3080 }
3081
3082 MachineIRBuilder MIRBuilder(I);
3083 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3084 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3085 MIRBuilder);
3086 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3087 I.eraseFromParent();
3088 return true;
3089 }
3090
3091 case TargetOpcode::G_FCMP: {
3092 MachineIRBuilder MIRBuilder(I);
3093 CmpInst::Predicate Pred =
3094 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3095 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3096 MIRBuilder, Pred) ||
3097 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3098 return false;
3099 I.eraseFromParent();
3100 return true;
3101 }
3102 case TargetOpcode::G_VASTART:
3103 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3104 : selectVaStartAAPCS(I, MF, MRI);
3105 case TargetOpcode::G_INTRINSIC:
3106 return selectIntrinsic(I, MRI);
3107 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3108 return selectIntrinsicWithSideEffects(I, MRI);
3109 case TargetOpcode::G_IMPLICIT_DEF: {
3110 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3111 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3112 const Register DstReg = I.getOperand(0).getReg();
3113 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3114 const TargetRegisterClass *DstRC =
3115 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3116 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3117 return true;
3118 }
3119 case TargetOpcode::G_BLOCK_ADDR: {
3120 if (TM.getCodeModel() == CodeModel::Large) {
3121 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3122 I.eraseFromParent();
3123 return true;
3124 } else {
3125 I.setDesc(TII.get(AArch64::MOVaddrBA));
3126 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3127 I.getOperand(0).getReg())
3128 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3129 /* Offset */ 0, AArch64II::MO_PAGE)
3130 .addBlockAddress(
3131 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3132 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3133 I.eraseFromParent();
3134 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3135 }
3136 }
3137 case AArch64::G_DUP: {
3138 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3139 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3140 // difficult because at RBS we may end up pessimizing the fpr case if we
3141 // decided to add an anyextend to fix this. Manual selection is the most
3142 // robust solution for now.
3143 Register SrcReg = I.getOperand(1).getReg();
3144 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3145 return false; // We expect the fpr regbank case to be imported.
3146 LLT SrcTy = MRI.getType(SrcReg);
3147 if (SrcTy.getSizeInBits() == 16)
3148 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3149 else if (SrcTy.getSizeInBits() == 8)
3150 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3151 else
3152 return false;
3153 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3154 }
3155 case TargetOpcode::G_INTRINSIC_TRUNC:
3156 return selectIntrinsicTrunc(I, MRI);
3157 case TargetOpcode::G_INTRINSIC_ROUND:
3158 return selectIntrinsicRound(I, MRI);
3159 case TargetOpcode::G_BUILD_VECTOR:
3160 return selectBuildVector(I, MRI);
3161 case TargetOpcode::G_MERGE_VALUES:
3162 return selectMergeValues(I, MRI);
3163 case TargetOpcode::G_UNMERGE_VALUES:
3164 return selectUnmergeValues(I, MRI);
3165 case TargetOpcode::G_SHUFFLE_VECTOR:
3166 return selectShuffleVector(I, MRI);
3167 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3168 return selectExtractElt(I, MRI);
3169 case TargetOpcode::G_INSERT_VECTOR_ELT:
3170 return selectInsertElt(I, MRI);
3171 case TargetOpcode::G_CONCAT_VECTORS:
3172 return selectConcatVectors(I, MRI);
3173 case TargetOpcode::G_JUMP_TABLE:
3174 return selectJumpTable(I, MRI);
3175 case TargetOpcode::G_VECREDUCE_FADD:
3176 case TargetOpcode::G_VECREDUCE_ADD:
3177 return selectReduction(I, MRI);
3178 }
3179
3180 return false;
3181}
3182
3183bool AArch64InstructionSelector::selectReduction(
3184 MachineInstr &I, MachineRegisterInfo &MRI) const {
3185 Register VecReg = I.getOperand(1).getReg();
3186 LLT VecTy = MRI.getType(VecReg);
3187 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3188 unsigned Opc = 0;
3189 if (VecTy == LLT::vector(16, 8))
3190 Opc = AArch64::ADDVv16i8v;
3191 else if (VecTy == LLT::vector(8, 16))
3192 Opc = AArch64::ADDVv8i16v;
3193 else if (VecTy == LLT::vector(4, 32))
3194 Opc = AArch64::ADDVv4i32v;
3195 else if (VecTy == LLT::vector(2, 64))
3196 Opc = AArch64::ADDPv2i64p;
3197 else {
3198 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3199 return false;
3200 }
3201 I.setDesc(TII.get(Opc));
3202 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3203 }
3204
3205 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3206 unsigned Opc = 0;
3207 if (VecTy == LLT::vector(2, 32))
3208 Opc = AArch64::FADDPv2i32p;
3209 else if (VecTy == LLT::vector(2, 64))
3210 Opc = AArch64::FADDPv2i64p;
3211 else {
3212 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3213 return false;
3214 }
3215 I.setDesc(TII.get(Opc));
3216 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3217 }
3218 return false;
3219}
3220
3221bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3222 MachineRegisterInfo &MRI) const {
3223 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")((I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3223, __PRETTY_FUNCTION__))
;
3224 Register JTAddr = I.getOperand(0).getReg();
3225 unsigned JTI = I.getOperand(1).getIndex();
3226 Register Index = I.getOperand(2).getReg();
3227 MachineIRBuilder MIB(I);
3228
3229 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3230 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3231
3232 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3233 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3234 {TargetReg, ScratchReg}, {JTAddr, Index})
3235 .addJumpTableIndex(JTI);
3236 // Build the indirect branch.
3237 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3238 I.eraseFromParent();
3239 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3240}
3241
3242bool AArch64InstructionSelector::selectJumpTable(
3243 MachineInstr &I, MachineRegisterInfo &MRI) const {
3244 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")((I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3244, __PRETTY_FUNCTION__))
;
3245 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")((I.getOperand(1).isJTI() && "Jump table op should have a JTI!"
) ? static_cast<void> (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3245, __PRETTY_FUNCTION__))
;
3246
3247 Register DstReg = I.getOperand(0).getReg();
3248 unsigned JTI = I.getOperand(1).getIndex();
3249 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3250 MachineIRBuilder MIB(I);
3251 auto MovMI =
3252 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3253 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3254 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3255 I.eraseFromParent();
3256 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3257}
3258
3259bool AArch64InstructionSelector::selectTLSGlobalValue(
3260 MachineInstr &I, MachineRegisterInfo &MRI) const {
3261 if (!STI.isTargetMachO())
3262 return false;
3263 MachineFunction &MF = *I.getParent()->getParent();
3264 MF.getFrameInfo().setAdjustsStack(true);
3265
3266 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3267 MachineIRBuilder MIB(I);
3268
3269 auto LoadGOT =
3270 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3271 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3272
3273 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3274 {LoadGOT.getReg(0)})
3275 .addImm(0);
3276
3277 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3278 // TLS calls preserve all registers except those that absolutely must be
3279 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3280 // silly).
3281 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3282 .addUse(AArch64::X0, RegState::Implicit)
3283 .addDef(AArch64::X0, RegState::Implicit)
3284 .addRegMask(TRI.getTLSCallPreservedMask());
3285
3286 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3287 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3288 MRI);
3289 I.eraseFromParent();
3290 return true;
3291}
3292
3293bool AArch64InstructionSelector::selectIntrinsicTrunc(
3294 MachineInstr &I, MachineRegisterInfo &MRI) const {
3295 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3296
3297 // Select the correct opcode.
3298 unsigned Opc = 0;
3299 if (!SrcTy.isVector()) {
3300 switch (SrcTy.getSizeInBits()) {
3301 default:
3302 case 16:
3303 Opc = AArch64::FRINTZHr;
3304 break;
3305 case 32:
3306 Opc = AArch64::FRINTZSr;
3307 break;
3308 case 64:
3309 Opc = AArch64::FRINTZDr;
3310 break;
3311 }
3312 } else {
3313 unsigned NumElts = SrcTy.getNumElements();
3314 switch (SrcTy.getElementType().getSizeInBits()) {
3315 default:
3316 break;
3317 case 16:
3318 if (NumElts == 4)
3319 Opc = AArch64::FRINTZv4f16;
3320 else if (NumElts == 8)
3321 Opc = AArch64::FRINTZv8f16;
3322 break;
3323 case 32:
3324 if (NumElts == 2)
3325 Opc = AArch64::FRINTZv2f32;
3326 else if (NumElts == 4)
3327 Opc = AArch64::FRINTZv4f32;
3328 break;
3329 case 64:
3330 if (NumElts == 2)
3331 Opc = AArch64::FRINTZv2f64;
3332 break;
3333 }
3334 }
3335
3336 if (!Opc) {
3337 // Didn't get an opcode above, bail.
3338 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3339 return false;
3340 }
3341
3342 // Legalization would have set us up perfectly for this; we just need to
3343 // set the opcode and move on.
3344 I.setDesc(TII.get(Opc));
3345 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3346}
3347
3348bool AArch64InstructionSelector::selectIntrinsicRound(
3349 MachineInstr &I, MachineRegisterInfo &MRI) const {
3350 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3351
3352 // Select the correct opcode.
3353 unsigned Opc = 0;
3354 if (!SrcTy.isVector()) {
3355 switch (SrcTy.getSizeInBits()) {
3356 default:
3357 case 16:
3358 Opc = AArch64::FRINTAHr;
3359 break;
3360 case 32:
3361 Opc = AArch64::FRINTASr;
3362 break;
3363 case 64:
3364 Opc = AArch64::FRINTADr;
3365 break;
3366 }
3367 } else {
3368 unsigned NumElts = SrcTy.getNumElements();
3369 switch (SrcTy.getElementType().getSizeInBits()) {
3370 default:
3371 break;
3372 case 16:
3373 if (NumElts == 4)
3374 Opc = AArch64::FRINTAv4f16;
3375 else if (NumElts == 8)
3376 Opc = AArch64::FRINTAv8f16;
3377 break;
3378 case 32:
3379 if (NumElts == 2)
3380 Opc = AArch64::FRINTAv2f32;
3381 else if (NumElts == 4)
3382 Opc = AArch64::FRINTAv4f32;
3383 break;
3384 case 64:
3385 if (NumElts == 2)
3386 Opc = AArch64::FRINTAv2f64;
3387 break;
3388 }
3389 }
3390
3391 if (!Opc) {
3392 // Didn't get an opcode above, bail.
3393 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3394 return false;
3395 }
3396
3397 // Legalization would have set us up perfectly for this; we just need to
3398 // set the opcode and move on.
3399 I.setDesc(TII.get(Opc));
3400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3401}
3402
3403bool AArch64InstructionSelector::selectVectorICmp(
3404 MachineInstr &I, MachineRegisterInfo &MRI) const {
3405 Register DstReg = I.getOperand(0).getReg();
3406 LLT DstTy = MRI.getType(DstReg);
3407 Register SrcReg = I.getOperand(2).getReg();
3408 Register Src2Reg = I.getOperand(3).getReg();
3409 LLT SrcTy = MRI.getType(SrcReg);
3410
3411 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3412 unsigned NumElts = DstTy.getNumElements();
3413
3414 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3415 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3416 // Third index is cc opcode:
3417 // 0 == eq
3418 // 1 == ugt
3419 // 2 == uge
3420 // 3 == ult
3421 // 4 == ule
3422 // 5 == sgt
3423 // 6 == sge
3424 // 7 == slt
3425 // 8 == sle
3426 // ne is done by negating 'eq' result.
3427
3428 // This table below assumes that for some comparisons the operands will be
3429 // commuted.
3430 // ult op == commute + ugt op
3431 // ule op == commute + uge op
3432 // slt op == commute + sgt op
3433 // sle op == commute + sge op
3434 unsigned PredIdx = 0;
3435 bool SwapOperands = false;
3436 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3437 switch (Pred) {
3438 case CmpInst::ICMP_NE:
3439 case CmpInst::ICMP_EQ:
3440 PredIdx = 0;
3441 break;
3442 case CmpInst::ICMP_UGT:
3443 PredIdx = 1;
3444 break;
3445 case CmpInst::ICMP_UGE:
3446 PredIdx = 2;
3447 break;
3448 case CmpInst::ICMP_ULT:
3449 PredIdx = 3;
3450 SwapOperands = true;
3451 break;
3452 case CmpInst::ICMP_ULE:
3453 PredIdx = 4;
3454 SwapOperands = true;
3455 break;
3456 case CmpInst::ICMP_SGT:
3457 PredIdx = 5;
3458 break;
3459 case CmpInst::ICMP_SGE:
3460 PredIdx = 6;
3461 break;
3462 case CmpInst::ICMP_SLT:
3463 PredIdx = 7;
3464 SwapOperands = true;
3465 break;
3466 case CmpInst::ICMP_SLE:
3467 PredIdx = 8;
3468 SwapOperands = true;
3469 break;
3470 default:
3471 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3471)
;
3472 return false;
3473 }
3474
3475 // This table obviously should be tablegen'd when we have our GISel native
3476 // tablegen selector.
3477
3478 static const unsigned OpcTable[4][4][9] = {
3479 {
3480 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3481 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3482 0 /* invalid */},
3483 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3484 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3485 0 /* invalid */},
3486 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3487 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3488 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3489 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3490 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3491 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3492 },
3493 {
3494 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3495 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3496 0 /* invalid */},
3497 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3498 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3499 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3500 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3501 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3502 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3503 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3504 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3505 0 /* invalid */}
3506 },
3507 {
3508 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3509 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3510 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3511 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3512 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3513 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3514 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3515 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3516 0 /* invalid */},
3517 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3518 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3519 0 /* invalid */}
3520 },
3521 {
3522 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3523 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3524 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3525 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3526 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3527 0 /* invalid */},
3528 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3529 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3530 0 /* invalid */},
3531 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3532 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3533 0 /* invalid */}
3534 },
3535 };
3536 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3537 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3538 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3539 if (!Opc) {
3540 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3541 return false;
3542 }
3543
3544 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3545 const TargetRegisterClass *SrcRC =
3546 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3547 if (!SrcRC) {
3548 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3549 return false;
3550 }
3551
3552 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3553 if (SrcTy.getSizeInBits() == 128)
3554 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3555
3556 if (SwapOperands)
3557 std::swap(SrcReg, Src2Reg);
3558
3559 MachineIRBuilder MIB(I);
3560 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3561 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3562
3563 // Invert if we had a 'ne' cc.
3564 if (NotOpc) {
3565 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3566 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3567 } else {
3568 MIB.buildCopy(DstReg, Cmp.getReg(0));
3569 }
3570 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3571 I.eraseFromParent();
3572 return true;
3573}
3574
3575MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3576 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3577 MachineIRBuilder &MIRBuilder) const {
3578 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3579
3580 auto BuildFn = [&](unsigned SubregIndex) {
3581 auto Ins =
3582 MIRBuilder
3583 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3584 .addImm(SubregIndex);
3585 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3586 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3587 return &*Ins;
3588 };
3589
3590 switch (EltSize) {
3591 case 16:
3592 return BuildFn(AArch64::hsub);
3593 case 32:
3594 return BuildFn(AArch64::ssub);
3595 case 64:
3596 return BuildFn(AArch64::dsub);
3597 default:
3598 return nullptr;
3599 }
3600}
3601
3602bool AArch64InstructionSelector::selectMergeValues(
3603 MachineInstr &I, MachineRegisterInfo &MRI) const {
3604 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")((I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3604, __PRETTY_FUNCTION__))
;
3605 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3606 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3607 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")((!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"
) ? static_cast<void> (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3607, __PRETTY_FUNCTION__))
;
3608 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3609
3610 if (I.getNumOperands() != 3)
3611 return false;
3612
3613 // Merging 2 s64s into an s128.
3614 if (DstTy == LLT::scalar(128)) {
3615 if (SrcTy.getSizeInBits() != 64)
3616 return false;
3617 MachineIRBuilder MIB(I);
3618 Register DstReg = I.getOperand(0).getReg();
3619 Register Src1Reg = I.getOperand(1).getReg();
3620 Register Src2Reg = I.getOperand(2).getReg();
3621 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3622 MachineInstr *InsMI =
3623 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3624 if (!InsMI)
3625 return false;
3626 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3627 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3628 if (!Ins2MI)
3629 return false;
3630 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3631 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3632 I.eraseFromParent();
3633 return true;
3634 }
3635
3636 if (RB.getID() != AArch64::GPRRegBankID)
3637 return false;
3638
3639 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3640 return false;
3641
3642 auto *DstRC = &AArch64::GPR64RegClass;
3643 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3644 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3645 TII.get(TargetOpcode::SUBREG_TO_REG))
3646 .addDef(SubToRegDef)
3647 .addImm(0)
3648 .addUse(I.getOperand(1).getReg())
3649 .addImm(AArch64::sub_32);
3650 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3651 // Need to anyext the second scalar before we can use bfm
3652 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3653 TII.get(TargetOpcode::SUBREG_TO_REG))
3654 .addDef(SubToRegDef2)
3655 .addImm(0)
3656 .addUse(I.getOperand(2).getReg())
3657 .addImm(AArch64::sub_32);
3658 MachineInstr &BFM =
3659 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3660 .addDef(I.getOperand(0).getReg())
3661 .addUse(SubToRegDef)
3662 .addUse(SubToRegDef2)
3663 .addImm(32)
3664 .addImm(31);
3665 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3666 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3667 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3668 I.eraseFromParent();
3669 return true;
3670}
3671
3672static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3673 const unsigned EltSize) {
3674 // Choose a lane copy opcode and subregister based off of the size of the
3675 // vector's elements.
3676 switch (EltSize) {
3677 case 16:
3678 CopyOpc = AArch64::CPYi16;
3679 ExtractSubReg = AArch64::hsub;
3680 break;
3681 case 32:
3682 CopyOpc = AArch64::CPYi32;
3683 ExtractSubReg = AArch64::ssub;
3684 break;
3685 case 64:
3686 CopyOpc = AArch64::CPYi64;
3687 ExtractSubReg = AArch64::dsub;
3688 break;
3689 default:
3690 // Unknown size, bail out.
3691 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3692 return false;
3693 }
3694 return true;
3695}
3696
3697MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3698 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3699 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3700 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3701 unsigned CopyOpc = 0;
3702 unsigned ExtractSubReg = 0;
3703 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3704 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3705 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3706 return nullptr;
3707 }
3708
3709 const TargetRegisterClass *DstRC =
3710 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3711 if (!DstRC) {
3712 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3713 return nullptr;
3714 }
3715
3716 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3717 const LLT &VecTy = MRI.getType(VecReg);
3718 const TargetRegisterClass *VecRC =
3719 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3720 if (!VecRC) {
3721 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3722 return nullptr;
3723 }
3724
3725 // The register that we're going to copy into.
3726 Register InsertReg = VecReg;
3727 if (!DstReg)
3728 DstReg = MRI.createVirtualRegister(DstRC);
3729 // If the lane index is 0, we just use a subregister COPY.
3730 if (LaneIdx == 0) {
3731 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3732 .addReg(VecReg, 0, ExtractSubReg);
3733 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3734 return &*Copy;
3735 }
3736
3737 // Lane copies require 128-bit wide registers. If we're dealing with an
3738 // unpacked vector, then we need to move up to that width. Insert an implicit
3739 // def and a subregister insert to get us there.
3740 if (VecTy.getSizeInBits() != 128) {
3741 MachineInstr *ScalarToVector = emitScalarToVector(
3742 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3743 if (!ScalarToVector)
3744 return nullptr;
3745 InsertReg = ScalarToVector->getOperand(0).getReg();
3746 }
3747
3748 MachineInstr *LaneCopyMI =
3749 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3750 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3751
3752 // Make sure that we actually constrain the initial copy.
3753 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3754 return LaneCopyMI;
3755}
3756
3757bool AArch64InstructionSelector::selectExtractElt(
3758 MachineInstr &I, MachineRegisterInfo &MRI) const {
3759 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3760, __PRETTY_FUNCTION__))
3760 "unexpected opcode!")((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3760, __PRETTY_FUNCTION__))
;
3761 Register DstReg = I.getOperand(0).getReg();
3762 const LLT NarrowTy = MRI.getType(DstReg);
3763 const Register SrcReg = I.getOperand(1).getReg();
3764 const LLT WideTy = MRI.getType(SrcReg);
3765 (void)WideTy;
3766 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3767, __PRETTY_FUNCTION__))
3767 "source register size too small!")((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3767, __PRETTY_FUNCTION__))
;
3768 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")((!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? static_cast<void> (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3768, __PRETTY_FUNCTION__))
;
3769
3770 // Need the lane index to determine the correct copy opcode.
3771 MachineOperand &LaneIdxOp = I.getOperand(2);
3772 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")((LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? static_cast<void> (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3772, __PRETTY_FUNCTION__))
;
3773
3774 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3775 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3776 return false;
3777 }
3778
3779 // Find the index to extract from.
3780 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3781 if (!VRegAndVal)
3782 return false;
3783 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3784
3785 MachineIRBuilder MIRBuilder(I);
3786
3787 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3788 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3789 LaneIdx, MIRBuilder);
3790 if (!Extract)
3791 return false;
3792
3793 I.eraseFromParent();
3794 return true;
3795}
3796
3797bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3798 MachineInstr &I, MachineRegisterInfo &MRI) const {
3799 unsigned NumElts = I.getNumOperands() - 1;
3800 Register SrcReg = I.getOperand(NumElts).getReg();
3801 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3802 const LLT SrcTy = MRI.getType(SrcReg);
3803
3804 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")((NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3804, __PRETTY_FUNCTION__))
;
3805 if (SrcTy.getSizeInBits() > 128) {
3806 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3807 return false;
3808 }
3809
3810 MachineIRBuilder MIB(I);
3811
3812 // We implement a split vector operation by treating the sub-vectors as
3813 // scalars and extracting them.
3814 const RegisterBank &DstRB =
3815 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3816 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3817 Register Dst = I.getOperand(OpIdx).getReg();
3818 MachineInstr *Extract =
3819 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3820 if (!Extract)
3821 return false;
3822 }
3823 I.eraseFromParent();
3824 return true;
3825}
3826
3827bool AArch64InstructionSelector::selectUnmergeValues(
3828 MachineInstr &I, MachineRegisterInfo &MRI) const {
3829 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3830, __PRETTY_FUNCTION__))
3830 "unexpected opcode")((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3830, __PRETTY_FUNCTION__))
;
3831
3832 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3833 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3834 AArch64::FPRRegBankID ||
3835 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3836 AArch64::FPRRegBankID) {
3837 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
3838 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
3839 return false;
3840 }
3841
3842 // The last operand is the vector source register, and every other operand is
3843 // a register to unpack into.
3844 unsigned NumElts = I.getNumOperands() - 1;
3845 Register SrcReg = I.getOperand(NumElts).getReg();
3846 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3847 const LLT WideTy = MRI.getType(SrcReg);
3848 (void)WideTy;
3849 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3850, __PRETTY_FUNCTION__))
3850 "can only unmerge from vector or s128 types!")(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3850, __PRETTY_FUNCTION__))
;
3851 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3852, __PRETTY_FUNCTION__))
3852 "source register size too small!")((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3852, __PRETTY_FUNCTION__))
;
3853
3854 if (!NarrowTy.isScalar())
3855 return selectSplitVectorUnmerge(I, MRI);
3856
3857 MachineIRBuilder MIB(I);
3858
3859 // Choose a lane copy opcode and subregister based off of the size of the
3860 // vector's elements.
3861 unsigned CopyOpc = 0;
3862 unsigned ExtractSubReg = 0;
3863 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3864 return false;
3865
3866 // Set up for the lane copies.
3867 MachineBasicBlock &MBB = *I.getParent();
3868
3869 // Stores the registers we'll be copying from.
3870 SmallVector<Register, 4> InsertRegs;
3871
3872 // We'll use the first register twice, so we only need NumElts-1 registers.
3873 unsigned NumInsertRegs = NumElts - 1;
3874
3875 // If our elements fit into exactly 128 bits, then we can copy from the source
3876 // directly. Otherwise, we need to do a bit of setup with some subregister
3877 // inserts.
3878 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3879 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3880 } else {
3881 // No. We have to perform subregister inserts. For each insert, create an
3882 // implicit def and a subregister insert, and save the register we create.
3883 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3884 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3885 MachineInstr &ImpDefMI =
3886 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3887 ImpDefReg);
3888
3889 // Now, create the subregister insert from SrcReg.
3890 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3891 MachineInstr &InsMI =
3892 *BuildMI(MBB, I, I.getDebugLoc(),
3893 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3894 .addUse(ImpDefReg)
3895 .addUse(SrcReg)
3896 .addImm(AArch64::dsub);
3897
3898 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3899 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3900
3901 // Save the register so that we can copy from it after.
3902 InsertRegs.push_back(InsertReg);
3903 }
3904 }
3905
3906 // Now that we've created any necessary subregister inserts, we can
3907 // create the copies.
3908 //
3909 // Perform the first copy separately as a subregister copy.
3910 Register CopyTo = I.getOperand(0).getReg();
3911 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3912 .addReg(InsertRegs[0], 0, ExtractSubReg);
3913 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3914
3915 // Now, perform the remaining copies as vector lane copies.
3916 unsigned LaneIdx = 1;
3917 for (Register InsReg : InsertRegs) {
3918 Register CopyTo = I.getOperand(LaneIdx).getReg();
3919 MachineInstr &CopyInst =
3920 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3921 .addUse(InsReg)
3922 .addImm(LaneIdx);
3923 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3924 ++LaneIdx;
3925 }
3926
3927 // Separately constrain the first copy's destination. Because of the
3928 // limitation in constrainOperandRegClass, we can't guarantee that this will
3929 // actually be constrained. So, do it ourselves using the second operand.
3930 const TargetRegisterClass *RC =
3931 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3932 if (!RC) {
3933 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
3934 return false;
3935 }
3936
3937 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3938 I.eraseFromParent();
3939 return true;
3940}
3941
3942bool AArch64InstructionSelector::selectConcatVectors(
3943 MachineInstr &I, MachineRegisterInfo &MRI) const {
3944 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3945, __PRETTY_FUNCTION__))
3945 "Unexpected opcode")((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3945, __PRETTY_FUNCTION__))
;
3946 Register Dst = I.getOperand(0).getReg();
3947 Register Op1 = I.getOperand(1).getReg();
3948 Register Op2 = I.getOperand(2).getReg();
3949 MachineIRBuilder MIRBuilder(I);
3950 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3951 if (!ConcatMI)
3952 return false;
3953 I.eraseFromParent();
3954 return true;
3955}
3956
3957unsigned
3958AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3959 MachineFunction &MF) const {
3960 Type *CPTy = CPVal->getType();
3961 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3962
3963 MachineConstantPool *MCP = MF.getConstantPool();
3964 return MCP->getConstantPoolIndex(CPVal, Alignment);
3965}
3966
3967MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3968 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3969 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3970
3971 auto Adrp =
3972 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3973 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3974
3975 MachineInstr *LoadMI = nullptr;
3976 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3977 case 16:
3978 LoadMI =
3979 &*MIRBuilder
3980 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3981 .addConstantPoolIndex(CPIdx, 0,
3982 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3983 break;
3984 case 8:
3985 LoadMI = &*MIRBuilder
3986 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3987 .addConstantPoolIndex(
3988 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3989 break;
3990 default:
3991 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
3992 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
3993 return nullptr;
3994 }
3995 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3996 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3997 return LoadMI;
3998}
3999
4000/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4001/// size and RB.
4002static std::pair<unsigned, unsigned>
4003getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4004 unsigned Opc, SubregIdx;
4005 if (RB.getID() == AArch64::GPRRegBankID) {
4006 if (EltSize == 16) {
4007 Opc = AArch64::INSvi16gpr;
4008 SubregIdx = AArch64::ssub;
4009 } else if (EltSize == 32) {
4010 Opc = AArch64::INSvi32gpr;
4011 SubregIdx = AArch64::ssub;
4012 } else if (EltSize == 64) {
4013 Opc = AArch64::INSvi64gpr;
4014 SubregIdx = AArch64::dsub;
4015 } else {
4016 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4016)
;
4017 }
4018 } else {
4019 if (EltSize == 8) {
4020 Opc = AArch64::INSvi8lane;
4021 SubregIdx = AArch64::bsub;
4022 } else if (EltSize == 16) {
4023 Opc = AArch64::INSvi16lane;
4024 SubregIdx = AArch64::hsub;
4025 } else if (EltSize == 32) {
4026 Opc = AArch64::INSvi32lane;
4027 SubregIdx = AArch64::ssub;
4028 } else if (EltSize == 64) {
4029 Opc = AArch64::INSvi64lane;
4030 SubregIdx = AArch64::dsub;
4031 } else {
4032 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4032)
;
4033 }
4034 }
4035 return std::make_pair(Opc, SubregIdx);
4036}
4037
4038MachineInstr *AArch64InstructionSelector::emitInstr(
4039 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4040 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4041 const ComplexRendererFns &RenderFns) const {
4042 assert(Opcode && "Expected an opcode?")((Opcode && "Expected an opcode?") ? static_cast<void
> (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4042, __PRETTY_FUNCTION__))
;
4043 assert(!isPreISelGenericOpcode(Opcode) &&((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4044, __PRETTY_FUNCTION__))
4044 "Function should only be used to produce selected instructions!")((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4044, __PRETTY_FUNCTION__))
;
4045 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4046 if (RenderFns)
4047 for (auto &Fn : *RenderFns)
4048 Fn(MI);
4049 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4050 return &*MI;
4051}
4052
4053MachineInstr *AArch64InstructionSelector::emitAddSub(
4054 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4055 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4056 MachineIRBuilder &MIRBuilder) const {
4057 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4058 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4058, __PRETTY_FUNCTION__))
;
2
'?' condition is true
4059 auto Ty = MRI.getType(LHS.getReg());
4060 assert(!Ty.isVector() && "Expected a scalar or pointer?")((!Ty.isVector() && "Expected a scalar or pointer?") ?
static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4060, __PRETTY_FUNCTION__))
;
3
'?' condition is true
4061 unsigned Size = Ty.getSizeInBits();
4062 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4062, __PRETTY_FUNCTION__))
;
4
Assuming 'Size' is not equal to 32
5
Assuming 'Size' is equal to 64
6
'?' condition is true
4063 bool Is32Bit = Size == 32;
4064
4065 // INSTRri form with positive arithmetic immediate.
4066 if (auto Fns = selectArithImmed(RHS))
7
Taking false branch
4067 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4068 MIRBuilder, Fns);
4069
4070 // INSTRri form with negative arithmetic immediate.
4071 if (auto Fns = selectNegArithImmed(RHS))
8
Taking false branch
4072 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4073 MIRBuilder, Fns);
4074
4075 // INSTRrx form.
4076 if (auto Fns = selectArithExtendedRegister(RHS))
9
Calling 'AArch64InstructionSelector::selectArithExtendedRegister'
4077 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4078 MIRBuilder, Fns);
4079
4080 // INSTRrs form.
4081 if (auto Fns = selectShiftedRegister(RHS))
4082 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4083 MIRBuilder, Fns);
4084 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4085 MIRBuilder);
4086}
4087
4088MachineInstr *
4089AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4090 MachineOperand &RHS,
4091 MachineIRBuilder &MIRBuilder) const {
4092 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4093 {{AArch64::ADDXri, AArch64::ADDWri},
4094 {AArch64::ADDXrs, AArch64::ADDWrs},
4095 {AArch64::ADDXrr, AArch64::ADDWrr},
4096 {AArch64::SUBXri, AArch64::SUBWri},
4097 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4098 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
1
Calling 'AArch64InstructionSelector::emitAddSub'
4099}
4100
4101MachineInstr *
4102AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4103 MachineOperand &RHS,
4104 MachineIRBuilder &MIRBuilder) const {
4105 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4106 {{AArch64::ADDSXri, AArch64::ADDSWri},
4107 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4108 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4109 {AArch64::SUBSXri, AArch64::SUBSWri},
4110 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4111 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4112}
4113
4114MachineInstr *
4115AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4116 MachineOperand &RHS,
4117 MachineIRBuilder &MIRBuilder) const {
4118 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4119 {{AArch64::SUBSXri, AArch64::SUBSWri},
4120 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4121 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4122 {AArch64::ADDSXri, AArch64::ADDSWri},
4123 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4124 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4125}
4126
4127MachineInstr *
4128AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4129 MachineIRBuilder &MIRBuilder) const {
4130 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4131 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4132 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4133 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4134}
4135
4136MachineInstr *
4137AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4138 MachineIRBuilder &MIRBuilder) const {
4139 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4139, __PRETTY_FUNCTION__))
;
4140 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4141 LLT Ty = MRI.getType(LHS.getReg());
4142 unsigned RegSize = Ty.getSizeInBits();
4143 bool Is32Bit = (RegSize == 32);
4144 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4145 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4146 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4147 // ANDS needs a logical immediate for its immediate form. Check if we can
4148 // fold one in.
4149 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4150 int64_t Imm = ValAndVReg->Value.getSExtValue();
4151
4152 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4153 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4154 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4155 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4156 return &*TstMI;
4157 }
4158 }
4159
4160 if (auto Fns = selectLogicalShiftedRegister(RHS))
4161 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4162 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4163}
4164
4165MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4166 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4167 MachineIRBuilder &MIRBuilder) const {
4168 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")((LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4168, __PRETTY_FUNCTION__))
;
4169 assert(Predicate.isPredicate() && "Expected predicate?")((Predicate.isPredicate() && "Expected predicate?") ?
static_cast<void> (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4169, __PRETTY_FUNCTION__))
;
4170 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4171 LLT CmpTy = MRI.getType(LHS.getReg());
4172 assert(!CmpTy.isVector() && "Expected scalar or pointer")((!CmpTy.isVector() && "Expected scalar or pointer") ?
static_cast<void> (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4172, __PRETTY_FUNCTION__))
;
4173 unsigned Size = CmpTy.getSizeInBits();
4174 (void)Size;
4175 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4175, __PRETTY_FUNCTION__))
;
4176 // Fold the compare into a cmn or tst if possible.
4177 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4178 return FoldCmp;
4179 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4180 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4181}
4182
4183MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4184 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4185 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4186#ifndef NDEBUG
4187 LLT Ty = MRI.getType(Dst);
4188 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4189, __PRETTY_FUNCTION__))
4189 "Expected a 32-bit scalar register?")((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4189, __PRETTY_FUNCTION__))
;
4190#endif
4191 const Register ZeroReg = AArch64::WZR;
4192 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4193 auto CSet =
4194 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4195 .addImm(getInvertedCondCode(CC));
4196 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4197 return &*CSet;
4198 };
4199
4200 AArch64CC::CondCode CC1, CC2;
4201 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4202 if (CC2 == AArch64CC::AL)
4203 return EmitCSet(Dst, CC1);
4204
4205 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4206 Register Def1Reg = MRI.createVirtualRegister(RC);
4207 Register Def2Reg = MRI.createVirtualRegister(RC);
4208 EmitCSet(Def1Reg, CC1);
4209 EmitCSet(Def2Reg, CC2);
4210 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4211 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4212 return &*OrMI;
4213}
4214
4215MachineInstr *
4216AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4217 MachineIRBuilder &MIRBuilder,
4218 Optional<CmpInst::Predicate> Pred) const {
4219 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4220 LLT Ty = MRI.getType(LHS);
4221 if (Ty.isVector())
4222 return nullptr;
4223 unsigned OpSize = Ty.getSizeInBits();
4224 if (OpSize != 32 && OpSize != 64)
4225 return nullptr;
4226
4227 // If this is a compare against +0.0, then we don't have
4228 // to explicitly materialize a constant.
4229 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4230 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4231
4232 auto IsEqualityPred = [](CmpInst::Predicate P) {
4233 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4234 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4235 };
4236 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4237 // Try commutating the operands.
4238 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4239 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4240 ShouldUseImm = true;
4241 std::swap(LHS, RHS);
4242 }
4243 }
4244 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4245 {AArch64::FCMPSri, AArch64::FCMPDri}};
4246 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4247
4248 // Partially build the compare. Decide if we need to add a use for the
4249 // third operand based off whether or not we're comparing against 0.0.
4250 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4251 if (!ShouldUseImm)
4252 CmpMI.addUse(RHS);
4253 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4254 return &*CmpMI;
4255}
4256
4257MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4258 Optional<Register> Dst, Register Op1, Register Op2,
4259 MachineIRBuilder &MIRBuilder) const {
4260 // We implement a vector concat by:
4261 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4262 // 2. Insert the upper vector into the destination's upper element
4263 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4264 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4265
4266 const LLT Op1Ty = MRI.getType(Op1);
4267 const LLT Op2Ty = MRI.getType(Op2);
4268
4269 if (Op1Ty != Op2Ty) {
4270 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4271 return nullptr;
4272 }
4273 assert(Op1Ty.isVector() && "Expected a vector for vector concat")((Op1Ty.isVector() && "Expected a vector for vector concat"
) ? static_cast<void> (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4273, __PRETTY_FUNCTION__))
;
4274
4275 if (Op1Ty.getSizeInBits() >= 128) {
4276 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4277 return nullptr;
4278 }
4279
4280 // At the moment we just support 64 bit vector concats.
4281 if (Op1Ty.getSizeInBits() != 64) {
4282 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4283 return nullptr;
4284 }
4285
4286 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4287 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4288 const TargetRegisterClass *DstRC =
4289 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4290
4291 MachineInstr *WidenedOp1 =
4292 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4293 MachineInstr *WidenedOp2 =
4294 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4295 if (!WidenedOp1 || !WidenedOp2) {
4296 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4297 return nullptr;
4298 }
4299
4300 // Now do the insert of the upper element.
4301 unsigned InsertOpc, InsSubRegIdx;
4302 std::tie(InsertOpc, InsSubRegIdx) =
4303 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4304
4305 if (!Dst)
4306 Dst = MRI.createVirtualRegister(DstRC);
4307 auto InsElt =
4308 MIRBuilder
4309 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4310 .addImm(1) /* Lane index */
4311 .addUse(WidenedOp2->getOperand(0).getReg())
4312 .addImm(0);
4313 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4314 return &*InsElt;
4315}
4316
4317MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4318 MachineInstr &I, MachineRegisterInfo &MRI) const {
4319 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4320, __PRETTY_FUNCTION__))
4320 "Expected a G_FCONSTANT!")((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4320, __PRETTY_FUNCTION__))
;
4321 MachineOperand &ImmOp = I.getOperand(1);
4322 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4323
4324 // Only handle 32 and 64 bit defs for now.
4325 if (DefSize != 32 && DefSize != 64)
4326 return nullptr;
4327
4328 // Don't handle null values using FMOV.
4329 if (ImmOp.getFPImm()->isNullValue())
4330 return nullptr;
4331
4332 // Get the immediate representation for the FMOV.
4333 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4334 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4335 : AArch64_AM::getFP64Imm(ImmValAPF);
4336
4337 // If this is -1, it means the immediate can't be represented as the requested
4338 // floating point value. Bail.
4339 if (Imm == -1)
4340 return nullptr;
4341
4342 // Update MI to represent the new FMOV instruction, constrain it, and return.
4343 ImmOp.ChangeToImmediate(Imm);
4344 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4345 I.setDesc(TII.get(MovOpc));
4346 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4347 return &I;
4348}
4349
4350MachineInstr *
4351AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4352 MachineIRBuilder &MIRBuilder) const {
4353 // CSINC increments the result when the predicate is false. Invert it.
4354 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4355 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4356 auto I =
4357 MIRBuilder
4358 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4359 .addImm(InvCC);
4360 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4361 return &*I;
4362}
4363
4364std::pair<MachineInstr *, AArch64CC::CondCode>
4365AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4366 MachineOperand &LHS,
4367 MachineOperand &RHS,
4368 MachineIRBuilder &MIRBuilder) const {
4369 switch (Opcode) {
4370 default:
4371 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4371)
;
4372 case TargetOpcode::G_SADDO:
4373 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4374 case TargetOpcode::G_UADDO:
4375 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4376 case TargetOpcode::G_SSUBO:
4377 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4378 }
4379}
4380
4381bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4382 MachineIRBuilder MIB(I);
4383 MachineRegisterInfo &MRI = *MIB.getMRI();
4384 // We want to recognize this pattern:
4385 //
4386 // $z = G_FCMP pred, $x, $y
4387 // ...
4388 // $w = G_SELECT $z, $a, $b
4389 //
4390 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4391 // some copies/truncs in between.)
4392 //
4393 // If we see this, then we can emit something like this:
4394 //
4395 // fcmp $x, $y
4396 // fcsel $w, $a, $b, pred
4397 //
4398 // Rather than emitting both of the rather long sequences in the standard
4399 // G_FCMP/G_SELECT select methods.
4400
4401 // First, check if the condition is defined by a compare.
4402 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4403 while (CondDef) {
4404 // We can only fold if all of the defs have one use.
4405 Register CondDefReg = CondDef->getOperand(0).getReg();
4406 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4407 // Unless it's another select.
4408 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4409 if (CondDef == &UI)
4410 continue;
4411 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4412 return false;
4413 }
4414 }
4415
4416 // We can skip over G_TRUNC since the condition is 1-bit.
4417 // Truncating/extending can have no impact on the value.
4418 unsigned Opc = CondDef->getOpcode();
4419 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4420 break;
4421
4422 // Can't see past copies from physregs.
4423 if (Opc == TargetOpcode::COPY &&
4424 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4425 return false;
4426
4427 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4428 }
4429
4430 // Is the condition defined by a compare?
4431 if (!CondDef)
4432 return false;
4433
4434 unsigned CondOpc = CondDef->getOpcode();
4435 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4436 return false;
4437
4438 AArch64CC::CondCode CondCode;
4439 if (CondOpc == TargetOpcode::G_ICMP) {
4440 auto Pred =
4441 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4442 CondCode = changeICMPPredToAArch64CC(Pred);
4443 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4444 CondDef->getOperand(1), MIB);
4445 } else {
4446 // Get the condition code for the select.
4447 auto Pred =
4448 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4449 AArch64CC::CondCode CondCode2;
4450 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4451
4452 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4453 // instructions to emit the comparison.
4454 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4455 // unnecessary.
4456 if (CondCode2 != AArch64CC::AL)
4457 return false;
4458
4459 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4460 CondDef->getOperand(3).getReg(), MIB)) {
4461 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4462 return false;
4463 }
4464 }
4465
4466 // Emit the select.
4467 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4468 I.getOperand(3).getReg(), CondCode, MIB);
4469 I.eraseFromParent();
4470 return true;
4471}
4472
4473MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4474 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4475 MachineIRBuilder &MIRBuilder) const {
4476 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4477, __PRETTY_FUNCTION__))
4477 "Unexpected MachineOperand")((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4477, __PRETTY_FUNCTION__))
;
4478 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4479 // We want to find this sort of thing:
4480 // x = G_SUB 0, y
4481 // G_ICMP z, x
4482 //
4483 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4484 // e.g:
4485 //
4486 // cmn z, y
4487
4488 // Helper lambda to detect the subtract followed by the compare.
4489 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4490 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4491 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4492 return false;
4493
4494 // Need to make sure NZCV is the same at the end of the transformation.
4495 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4496 return false;
4497
4498 // We want to match against SUBs.
4499 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4500 return false;
4501
4502 // Make sure that we're getting
4503 // x = G_SUB 0, y
4504 auto ValAndVReg =
4505 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4506 if (!ValAndVReg || ValAndVReg->Value != 0)
4507 return false;
4508
4509 // This can safely be represented as a CMN.
4510 return true;
4511 };
4512
4513 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4514 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4515 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4516 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4517 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4518
4519 // Given this:
4520 //
4521 // x = G_SUB 0, y
4522 // G_ICMP x, z
4523 //
4524 // Produce this:
4525 //
4526 // cmn y, z
4527 if (IsCMN(LHSDef, CC))
4528 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4529
4530 // Same idea here, but with the RHS of the compare instead:
4531 //
4532 // Given this:
4533 //
4534 // x = G_SUB 0, y
4535 // G_ICMP z, x
4536 //
4537 // Produce this:
4538 //
4539 // cmn z, y
4540 if (IsCMN(RHSDef, CC))
4541 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4542
4543 // Given this:
4544 //
4545 // z = G_AND x, y
4546 // G_ICMP z, 0
4547 //
4548 // Produce this if the compare is signed:
4549 //
4550 // tst x, y
4551 if (!CmpInst::isUnsigned(P) && LHSDef &&
4552 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4553 // Make sure that the RHS is 0.
4554 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4555 if (!ValAndVReg || ValAndVReg->Value != 0)
4556 return nullptr;
4557
4558 return emitTST(LHSDef->getOperand(1),
4559 LHSDef->getOperand(2), MIRBuilder);
4560 }
4561
4562 return nullptr;
4563}
4564
4565bool AArch64InstructionSelector::selectShuffleVector(
4566 MachineInstr &I, MachineRegisterInfo &MRI) const {
4567 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4568 Register Src1Reg = I.getOperand(1).getReg();
4569 const LLT Src1Ty = MRI.getType(Src1Reg);
4570 Register Src2Reg = I.getOperand(2).getReg();
4571 const LLT Src2Ty = MRI.getType(Src2Reg);
4572 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4573
4574 MachineBasicBlock &MBB = *I.getParent();
4575 MachineFunction &MF = *MBB.getParent();
4576 LLVMContext &Ctx = MF.getFunction().getContext();
4577
4578 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4579 // it's originated from a <1 x T> type. Those should have been lowered into
4580 // G_BUILD_VECTOR earlier.
4581 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4582 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4583 return false;
4584 }
4585
4586 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4587
4588 SmallVector<Constant *, 64> CstIdxs;
4589 for (int Val : Mask) {
4590 // For now, any undef indexes we'll just assume to be 0. This should be
4591 // optimized in future, e.g. to select DUP etc.
4592 Val = Val < 0 ? 0 : Val;
4593 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4594 unsigned Offset = Byte + Val * BytesPerElt;
4595 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4596 }
4597 }
4598
4599 MachineIRBuilder MIRBuilder(I);
4600
4601 // Use a constant pool to load the index vector for TBL.
4602 Constant *CPVal = ConstantVector::get(CstIdxs);
4603 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4604 if (!IndexLoad) {
4605 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4606 return false;
4607 }
4608
4609 if (DstTy.getSizeInBits() != 128) {
4610 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")((DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4610, __PRETTY_FUNCTION__))
;
4611 // This case can be done with TBL1.
4612 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4613 if (!Concat) {
4614 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4615 return false;
4616 }
4617
4618 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4619 IndexLoad =
4620 emitScalarToVector(64, &AArch64::FPR128RegClass,
4621 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4622
4623 auto TBL1 = MIRBuilder.buildInstr(
4624 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4625 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4626 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4627
4628 auto Copy =
4629 MIRBuilder
4630 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4631 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4632 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4633 I.eraseFromParent();
4634 return true;
4635 }
4636
4637 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4638 // Q registers for regalloc.
4639 auto RegSeq = MIRBuilder
4640 .buildInstr(TargetOpcode::REG_SEQUENCE,
4641 {&AArch64::QQRegClass}, {Src1Reg})
4642 .addImm(AArch64::qsub0)
4643 .addUse(Src2Reg)
4644 .addImm(AArch64::qsub1);
4645
4646 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4647 {RegSeq, IndexLoad->getOperand(0)});
4648 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4649 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4650 I.eraseFromParent();
4651 return true;
4652}
4653
4654MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4655 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4656 unsigned LaneIdx, const RegisterBank &RB,
4657 MachineIRBuilder &MIRBuilder) const {
4658 MachineInstr *InsElt = nullptr;
4659 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4660 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4661
4662 // Create a register to define with the insert if one wasn't passed in.
4663 if (!DstReg)
4664 DstReg = MRI.createVirtualRegister(DstRC);
4665
4666 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4667 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4668
4669 if (RB.getID() == AArch64::FPRRegBankID) {
4670 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4671 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4672 .addImm(LaneIdx)
4673 .addUse(InsSub->getOperand(0).getReg())
4674 .addImm(0);
4675 } else {
4676 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4677 .addImm(LaneIdx)
4678 .addUse(EltReg);
4679 }
4680
4681 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4682 return InsElt;
4683}
4684
4685bool AArch64InstructionSelector::selectInsertElt(
4686 MachineInstr &I, MachineRegisterInfo &MRI) const {
4687 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)((I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4687, __PRETTY_FUNCTION__))
;
4688
4689 // Get information on the destination.
4690 Register DstReg = I.getOperand(0).getReg();
4691 const LLT DstTy = MRI.getType(DstReg);
4692 unsigned VecSize = DstTy.getSizeInBits();
4693
4694 // Get information on the element we want to insert into the destination.
4695 Register EltReg = I.getOperand(2).getReg();
4696 const LLT EltTy = MRI.getType(EltReg);
4697 unsigned EltSize = EltTy.getSizeInBits();
4698 if (EltSize < 16 || EltSize > 64)
4699 return false; // Don't support all element types yet.
4700
4701 // Find the definition of the index. Bail out if it's not defined by a
4702 // G_CONSTANT.
4703 Register IdxReg = I.getOperand(3).getReg();
4704 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4705 if (!VRegAndVal)
4706 return false;
4707 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4708
4709 // Perform the lane insert.
4710 Register SrcReg = I.getOperand(1).getReg();
4711 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4712 MachineIRBuilder MIRBuilder(I);
4713
4714 if (VecSize < 128) {
4715 // If the vector we're inserting into is smaller than 128 bits, widen it
4716 // to 128 to do the insert.
4717 MachineInstr *ScalarToVec = emitScalarToVector(
4718 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4719 if (!ScalarToVec)
4720 return false;
4721 SrcReg = ScalarToVec->getOperand(0).getReg();
4722 }
4723
4724 // Create an insert into a new FPR128 register.
4725 // Note that if our vector is already 128 bits, we end up emitting an extra
4726 // register.
4727 MachineInstr *InsMI =
4728 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4729
4730 if (VecSize < 128) {
4731 // If we had to widen to perform the insert, then we have to demote back to
4732 // the original size to get the result we want.
4733 Register DemoteVec = InsMI->getOperand(0).getReg();
4734 const TargetRegisterClass *RC =
4735 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4736 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4737 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4738 return false;
4739 }
4740 unsigned SubReg = 0;
4741 if (!getSubRegForClass(RC, TRI, SubReg))
4742 return false;
4743 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4744 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4745 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4746 return false;
4747 }
4748 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4749 .addReg(DemoteVec, 0, SubReg);
4750 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4751 } else {
4752 // No widening needed.
4753 InsMI->getOperand(0).setReg(DstReg);
4754 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4755 }
4756
4757 I.eraseFromParent();
4758 return true;
4759}
4760
4761bool AArch64InstructionSelector::tryOptConstantBuildVec(
4762 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4763 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4763, __PRETTY_FUNCTION__))
;
4764 unsigned DstSize = DstTy.getSizeInBits();
4765 assert(DstSize <= 128 && "Unexpected build_vec type!")((DstSize <= 128 && "Unexpected build_vec type!") ?
static_cast<void> (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4765, __PRETTY_FUNCTION__))
;
4766 if (DstSize < 32)
4767 return false;
4768 // Check if we're building a constant vector, in which case we want to
4769 // generate a constant pool load instead of a vector insert sequence.
4770 SmallVector<Constant *, 16> Csts;
4771 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4772 // Try to find G_CONSTANT or G_FCONSTANT
4773 auto *OpMI =
4774 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4775 if (OpMI)
4776 Csts.emplace_back(
4777 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4778 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4779 I.getOperand(Idx).getReg(), MRI)))
4780 Csts.emplace_back(
4781 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4782 else
4783 return false;
4784 }
4785 Constant *CV = ConstantVector::get(Csts);
4786 MachineIRBuilder MIB(I);
4787 if (CV->isNullValue()) {
4788 // Until the importer can support immAllZerosV in pattern leaf nodes,
4789 // select a zero move manually here.
4790 Register DstReg = I.getOperand(0).getReg();
4791 if (DstSize == 128) {
4792 auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4793 I.eraseFromParent();
4794 return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4795 } else if (DstSize == 64) {
4796 auto Mov =
4797 MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4798 .addImm(0);
4799 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4800 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4801 I.eraseFromParent();
4802 return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4803 }
4804 }
4805 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4806 if (!CPLoad) {
4807 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for build_vector"
; } } while (false)
;
4808 return false;
4809 }
4810 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4811 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4812 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4813 MRI);
4814 I.eraseFromParent();
4815 return true;
4816}
4817
4818bool AArch64InstructionSelector::selectBuildVector(
4819 MachineInstr &I, MachineRegisterInfo &MRI) const {
4820 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4820, __PRETTY_FUNCTION__))
;
4821 // Until we port more of the optimized selections, for now just use a vector
4822 // insert sequence.
4823 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4824 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4825 unsigned EltSize = EltTy.getSizeInBits();
4826
4827 if (tryOptConstantBuildVec(I, DstTy, MRI))
4828 return true;
4829 if (EltSize < 16 || EltSize > 64)
4830 return false; // Don't support all element types yet.
4831 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4832 MachineIRBuilder MIRBuilder(I);
4833
4834 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4835 MachineInstr *ScalarToVec =
4836 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4837 I.getOperand(1).getReg(), MIRBuilder);
4838 if (!ScalarToVec)
4839 return false;
4840
4841 Register DstVec = ScalarToVec->getOperand(0).getReg();
4842 unsigned DstSize = DstTy.getSizeInBits();
4843
4844 // Keep track of the last MI we inserted. Later on, we might be able to save
4845 // a copy using it.
4846 MachineInstr *PrevMI = nullptr;
4847 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4848 // Note that if we don't do a subregister copy, we can end up making an
4849 // extra register.
4850 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4851 MIRBuilder);
4852 DstVec = PrevMI->getOperand(0).getReg();
4853 }
4854
4855 // If DstTy's size in bits is less than 128, then emit a subregister copy
4856 // from DstVec to the last register we've defined.
4857 if (DstSize < 128) {
4858 // Force this to be FPR using the destination vector.
4859 const TargetRegisterClass *RC =
4860 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4861 if (!RC)
4862 return false;
4863 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4864 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4865 return false;
4866 }
4867
4868 unsigned SubReg = 0;
4869 if (!getSubRegForClass(RC, TRI, SubReg))
4870 return false;
4871 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4872 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4873 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4874 return false;
4875 }
4876
4877 Register Reg = MRI.createVirtualRegister(RC);
4878 Register DstReg = I.getOperand(0).getReg();
4879
4880 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4881 .addReg(DstVec, 0, SubReg);
4882 MachineOperand &RegOp = I.getOperand(1);
4883 RegOp.setReg(Reg);
4884 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4885 } else {
4886 // We don't need a subregister copy. Save a copy by re-using the
4887 // destination register on the final insert.
4888 assert(PrevMI && "PrevMI was null?")((PrevMI && "PrevMI was null?") ? static_cast<void
> (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4888, __PRETTY_FUNCTION__))
;
4889 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4890 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4891 }
4892
4893 I.eraseFromParent();
4894 return true;
4895}
4896
4897/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4898/// ID if it exists, and 0 otherwise.
4899static unsigned findIntrinsicID(MachineInstr &I) {
4900 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4901 return Op.isIntrinsicID();
4902 });
4903 if (IntrinOp == I.operands_end())
4904 return 0;
4905 return IntrinOp->getIntrinsicID();
4906}
4907
4908bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4909 MachineInstr &I, MachineRegisterInfo &MRI) const {
4910 // Find the intrinsic ID.
4911 unsigned IntrinID = findIntrinsicID(I);
4912 if (!IntrinID)
4913 return false;
4914 MachineIRBuilder MIRBuilder(I);
4915
4916 // Select the instruction.
4917 switch (IntrinID) {
4918 default:
4919 return false;
4920 case Intrinsic::trap:
4921 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4922 break;
4923 case Intrinsic::debugtrap:
4924 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4925 break;
4926 case Intrinsic::ubsantrap:
4927 MIRBuilder.buildInstr(AArch64::BRK, {}, {})
4928 .addImm(I.getOperand(1).getImm() | ('U' << 8));
4929 break;
4930 }
4931
4932 I.eraseFromParent();
4933 return true;
4934}
4935
4936bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4937 MachineRegisterInfo &MRI) {
4938 unsigned IntrinID = findIntrinsicID(I);
4939 if (!IntrinID)
4940 return false;
4941 MachineIRBuilder MIRBuilder(I);
4942
4943 switch (IntrinID) {
4944 default:
4945 break;
4946 case Intrinsic::aarch64_crypto_sha1h: {
4947 Register DstReg = I.getOperand(0).getReg();
4948 Register SrcReg = I.getOperand(2).getReg();
4949
4950 // FIXME: Should this be an assert?
4951 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4952 MRI.getType(SrcReg).getSizeInBits() != 32)
4953 return false;
4954
4955 // The operation has to happen on FPRs. Set up some new FPR registers for
4956 // the source and destination if they are on GPRs.
4957 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4958 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4959 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4960
4961 // Make sure the copy ends up getting constrained properly.
4962 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4963 AArch64::GPR32RegClass, MRI);
4964 }
4965
4966 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4967 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4968
4969 // Actually insert the instruction.
4970 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4971 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4972
4973 // Did we create a new register for the destination?
4974 if (DstReg != I.getOperand(0).getReg()) {
4975 // Yep. Copy the result of the instruction back into the original
4976 // destination.
4977 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4978 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4979 AArch64::GPR32RegClass, MRI);
4980 }
4981
4982 I.eraseFromParent();
4983 return true;
4984 }
4985 case Intrinsic::frameaddress:
4986 case Intrinsic::returnaddress: {
4987 MachineFunction &MF = *I.getParent()->getParent();
4988 MachineFrameInfo &MFI = MF.getFrameInfo();
4989
4990 unsigned Depth = I.getOperand(2).getImm();
4991 Register DstReg = I.getOperand(0).getReg();
4992 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4993
4994 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4995 if (!MFReturnAddr) {
4996 // Insert the copy from LR/X30 into the entry block, before it can be
4997 // clobbered by anything.
4998 MFI.setReturnAddressIsTaken(true);
4999 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5000 AArch64::GPR64RegClass);
5001 }
5002
5003 if (STI.hasPAuth()) {
5004 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5005 } else {
5006 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5007 MIRBuilder.buildInstr(AArch64::XPACLRI);
5008 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5009 }
5010
5011 I.eraseFromParent();
5012 return true;
5013 }
5014
5015 MFI.setFrameAddressIsTaken(true);
5016 Register FrameAddr(AArch64::FP);
5017 while (Depth--) {
5018 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5019 auto Ldr =
5020 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
5021 .addImm(0);
5022 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5023 FrameAddr = NextFrame;
5024 }
5025
5026 if (IntrinID == Intrinsic::frameaddress)
5027 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
5028 else {
5029 MFI.setReturnAddressIsTaken(true);
5030
5031 if (STI.hasPAuth()) {
5032 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5033 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5034 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5035 } else {
5036 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
5037 MIRBuilder.buildInstr(AArch64::XPACLRI);
5038 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5039 }
5040 }
5041
5042 I.eraseFromParent();
5043 return true;
5044 }
5045 }
5046 return false;
5047}
5048
5049InstructionSelector::ComplexRendererFns
5050AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5051 auto MaybeImmed = getImmedFromMO(Root);
5052 if (MaybeImmed == None || *MaybeImmed > 31)
5053 return None;
5054 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5055 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5056}
5057
5058InstructionSelector::ComplexRendererFns
5059AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5060 auto MaybeImmed = getImmedFromMO(Root);
5061 if (MaybeImmed == None || *MaybeImmed > 31)
5062 return None;
5063 uint64_t Enc = 31 - *MaybeImmed;
5064 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5065}
5066
5067InstructionSelector::ComplexRendererFns
5068AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5069 auto MaybeImmed = getImmedFromMO(Root);
5070 if (MaybeImmed == None || *MaybeImmed > 63)
5071 return None;
5072 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5073 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5074}
5075
5076InstructionSelector::ComplexRendererFns
5077AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5078 auto MaybeImmed = getImmedFromMO(Root);
5079 if (MaybeImmed == None || *MaybeImmed > 63)
5080 return None;
5081 uint64_t Enc = 63 - *MaybeImmed;
5082 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5083}
5084
5085/// Helper to select an immediate value that can be represented as a 12-bit
5086/// value shifted left by either 0 or 12. If it is possible to do so, return
5087/// the immediate and shift value. If not, return None.
5088///
5089/// Used by selectArithImmed and selectNegArithImmed.
5090InstructionSelector::ComplexRendererFns
5091AArch64InstructionSelector::select12BitValueWithLeftShift(
5092 uint64_t Immed) const {
5093 unsigned ShiftAmt;
5094 if (Immed >> 12 == 0) {
5095 ShiftAmt = 0;
5096 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5097 ShiftAmt = 12;
5098 Immed = Immed >> 12;
5099 } else
5100 return None;
5101
5102 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5103 return {{
5104 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5105 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5106 }};
5107}
5108
5109/// SelectArithImmed - Select an immediate value that can be represented as
5110/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5111/// Val set to the 12-bit value and Shift set to the shifter operand.
5112InstructionSelector::ComplexRendererFns
5113AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5114 // This function is called from the addsub_shifted_imm ComplexPattern,
5115 // which lists [imm] as the list of opcode it's interested in, however
5116 // we still need to check whether the operand is actually an immediate
5117 // here because the ComplexPattern opcode list is only used in
5118 // root-level opcode matching.
5119 auto MaybeImmed = getImmedFromMO(Root);
5120 if (MaybeImmed == None)
5121 return None;
5122 return select12BitValueWithLeftShift(*MaybeImmed);
5123}
5124
5125/// SelectNegArithImmed - As above, but negates the value before trying to
5126/// select it.
5127InstructionSelector::ComplexRendererFns
5128AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5129 // We need a register here, because we need to know if we have a 64 or 32
5130 // bit immediate.
5131 if (!Root.isReg())
5132 return None;
5133 auto MaybeImmed = getImmedFromMO(Root);
5134 if (MaybeImmed == None)
5135 return None;
5136 uint64_t Immed = *MaybeImmed;
5137
5138 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5139 // have the opposite effect on the C flag, so this pattern mustn't match under
5140 // those circumstances.
5141 if (Immed == 0)
5142 return None;
5143
5144 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5145 // the root.
5146 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5147 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5148 Immed = ~((uint32_t)Immed) + 1;
5149 else
5150 Immed = ~Immed + 1ULL;
5151
5152 if (Immed & 0xFFFFFFFFFF000000ULL)
5153 return None;
5154
5155 Immed &= 0xFFFFFFULL;
5156 return select12BitValueWithLeftShift(Immed);
5157}
5158
5159/// Return true if it is worth folding MI into an extended register. That is,
5160/// if it's safe to pull it into the addressing mode of a load or store as a
5161/// shift.
5162bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5163 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5164 // Always fold if there is one use, or if we're optimizing for size.
5165 Register DefReg = MI.getOperand(0).getReg();
5166 if (MRI.hasOneNonDBGUse(DefReg) ||
5167 MI.getParent()->getParent()->getFunction().hasMinSize())
5168 return true;
5169
5170 // It's better to avoid folding and recomputing shifts when we don't have a
5171 // fastpath.
5172 if (!STI.hasLSLFast())
5173 return false;
5174
5175 // We have a fastpath, so folding a shift in and potentially computing it
5176 // many times may be beneficial. Check if this is only used in memory ops.
5177 // If it is, then we should fold.
5178 return all_of(MRI.use_nodbg_instructions(DefReg),
5179 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5180}
5181
5182static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5183 switch (Type) {
5184 case AArch64_AM::SXTB:
5185 case AArch64_AM::SXTH:
5186 case AArch64_AM::SXTW:
5187 return true;
5188 default:
5189 return false;
5190 }
5191}
5192
5193InstructionSelector::ComplexRendererFns
5194AArch64InstructionSelector::selectExtendedSHL(
5195 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5196 unsigned SizeInBytes, bool WantsExt) const {
5197 assert(Base.isReg() && "Expected base to be a register operand")((Base.isReg() && "Expected base to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5197, __PRETTY_FUNCTION__))
;
5198 assert(Offset.isReg() && "Expected offset to be a register operand")((Offset.isReg() && "Expected offset to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5198, __PRETTY_FUNCTION__))
;
5199
5200 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5201 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5202 if (!OffsetInst)
5203 return None;
5204
5205 unsigned OffsetOpc = OffsetInst->getOpcode();
5206 bool LookedThroughZExt = false;
5207 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5208 // Try to look through a ZEXT.
5209 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5210 return None;
5211
5212 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5213 OffsetOpc = OffsetInst->getOpcode();
5214 LookedThroughZExt = true;
5215
5216 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5217 return None;
5218 }
5219 // Make sure that the memory op is a valid size.
5220 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5221 if (LegalShiftVal == 0)
5222 return None;
5223 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5224 return None;
5225
5226 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5227 // register we will offset is the LHS, and the register containing the
5228 // constant is the RHS.
5229 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5230 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5231 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5232 if (!ValAndVReg) {
5233 // We didn't get a constant on the RHS. If the opcode is a shift, then
5234 // we're done.
5235 if (OffsetOpc == TargetOpcode::G_SHL)
5236 return None;
5237
5238 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5239 std::swap(OffsetReg, ConstantReg);
5240 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5241 if (!ValAndVReg)
5242 return None;
5243 }
5244
5245 // The value must fit into 3 bits, and must be positive. Make sure that is
5246 // true.
5247 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5248
5249 // Since we're going to pull this into a shift, the constant value must be
5250 // a power of 2. If we got a multiply, then we need to check this.
5251 if (OffsetOpc == TargetOpcode::G_MUL) {
5252 if (!isPowerOf2_32(ImmVal))
5253 return None;
5254
5255 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5256 ImmVal = Log2_32(ImmVal);
5257 }
5258
5259 if ((ImmVal & 0x7) != ImmVal)
5260 return None;
5261
5262 // We are only allowed to shift by LegalShiftVal. This shift value is built
5263 // into the instruction, so we can't just use whatever we want.
5264 if (ImmVal != LegalShiftVal)
5265 return None;
5266
5267 unsigned SignExtend = 0;
5268 if (WantsExt) {
5269 // Check if the offset is defined by an extend, unless we looked through a
5270 // G_ZEXT earlier.
5271 if (!LookedThroughZExt) {
5272 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5273 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5274 if (Ext == AArch64_AM::InvalidShiftExtend)
5275 return None;
5276
5277 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5278 // We only support SXTW for signed extension here.
5279 if (SignExtend && Ext != AArch64_AM::SXTW)
5280 return None;
5281 OffsetReg = ExtInst->getOperand(1).getReg();
5282 }
5283
5284 // Need a 32-bit wide register here.
5285 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5286 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5287 }
5288
5289 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5290 // offset. Signify that we are shifting by setting the shift flag to 1.
5291 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5292 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5293 [=](MachineInstrBuilder &MIB) {
5294 // Need to add both immediates here to make sure that they are both
5295 // added to the instruction.
5296 MIB.addImm(SignExtend);
5297 MIB.addImm(1);
5298 }}};
5299}
5300
5301/// This is used for computing addresses like this:
5302///
5303/// ldr x1, [x2, x3, lsl #3]
5304///
5305/// Where x2 is the base register, and x3 is an offset register. The shift-left
5306/// is a constant value specific to this load instruction. That is, we'll never
5307/// see anything other than a 3 here (which corresponds to the size of the
5308/// element being loaded.)
5309InstructionSelector::ComplexRendererFns
5310AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5311 MachineOperand &Root, unsigned SizeInBytes) const {
5312 if (!Root.isReg())
5313 return None;
5314 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5315
5316 // We want to find something like this:
5317 //
5318 // val = G_CONSTANT LegalShiftVal
5319 // shift = G_SHL off_reg val
5320 // ptr = G_PTR_ADD base_reg shift
5321 // x = G_LOAD ptr
5322 //
5323 // And fold it into this addressing mode:
5324 //
5325 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5326
5327 // Check if we can find the G_PTR_ADD.
5328 MachineInstr *PtrAdd =
5329 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5330 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5331 return None;
5332
5333 // Now, try to match an opcode which will match our specific offset.
5334 // We want a G_SHL or a G_MUL.
5335 MachineInstr *OffsetInst =
5336 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5337 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5338 OffsetInst->getOperand(0), SizeInBytes,
5339 /*WantsExt=*/false);
5340}
5341
5342/// This is used for computing addresses like this:
5343///
5344/// ldr x1, [x2, x3]
5345///
5346/// Where x2 is the base register, and x3 is an offset register.
5347///
5348/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5349/// this will do so. Otherwise, it will return None.
5350InstructionSelector::ComplexRendererFns
5351AArch64InstructionSelector::selectAddrModeRegisterOffset(
5352 MachineOperand &Root) const {
5353 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5354
5355 // We need a GEP.
5356 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5357 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5358 return None;
5359
5360 // If this is used more than once, let's not bother folding.
5361 // TODO: Check if they are memory ops. If they are, then we can still fold
5362 // without having to recompute anything.
5363 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5364 return None;
5365
5366 // Base is the GEP's LHS, offset is its RHS.
5367 return {{[=](MachineInstrBuilder &MIB) {
5368 MIB.addUse(Gep->getOperand(1).getReg());
5369 },
5370 [=](MachineInstrBuilder &MIB) {
5371 MIB.addUse(Gep->getOperand(2).getReg());
5372 },
5373 [=](MachineInstrBuilder &MIB) {
5374 // Need to add both immediates here to make sure that they are both
5375 // added to the instruction.
5376 MIB.addImm(0);
5377 MIB.addImm(0);
5378 }}};
5379}
5380
5381/// This is intended to be equivalent to selectAddrModeXRO in
5382/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5383InstructionSelector::ComplexRendererFns
5384AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5385 unsigned SizeInBytes) const {
5386 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5387 if (!Root.isReg())
5388 return None;
5389 MachineInstr *PtrAdd =
5390 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5391 if (!PtrAdd)
5392 return None;
5393
5394 // Check for an immediates which cannot be encoded in the [base + imm]
5395 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5396 // end up with code like:
5397 //
5398 // mov x0, wide
5399 // add x1 base, x0
5400 // ldr x2, [x1, x0]
5401 //
5402 // In this situation, we can use the [base, xreg] addressing mode to save an
5403 // add/sub:
5404 //
5405 // mov x0, wide
5406 // ldr x2, [base, x0]
5407 auto ValAndVReg =
5408 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5409 if (ValAndVReg) {
5410 unsigned Scale = Log2_32(SizeInBytes);
5411 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5412
5413 // Skip immediates that can be selected in the load/store addresing
5414 // mode.
5415 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5416 ImmOff < (0x1000 << Scale))
5417 return None;
5418
5419 // Helper lambda to decide whether or not it is preferable to emit an add.
5420 auto isPreferredADD = [](int64_t ImmOff) {
5421 // Constants in [0x0, 0xfff] can be encoded in an add.
5422 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5423 return true;
5424
5425 // Can it be encoded in an add lsl #12?
5426 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5427 return false;
5428
5429 // It can be encoded in an add lsl #12, but we may not want to. If it is
5430 // possible to select this as a single movz, then prefer that. A single
5431 // movz is faster than an add with a shift.
5432 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5433 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5434 };
5435
5436 // If the immediate can be encoded in a single add/sub, then bail out.
5437 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5438 return None;
5439 }
5440
5441 // Try to fold shifts into the addressing mode.
5442 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5443 if (AddrModeFns)
5444 return AddrModeFns;
5445
5446 // If that doesn't work, see if it's possible to fold in registers from
5447 // a GEP.
5448 return selectAddrModeRegisterOffset(Root);
5449}
5450
5451/// This is used for computing addresses like this:
5452///
5453/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5454///
5455/// Where we have a 64-bit base register, a 32-bit offset register, and an
5456/// extend (which may or may not be signed).
5457InstructionSelector::ComplexRendererFns
5458AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5459 unsigned SizeInBytes) const {
5460 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5461
5462 MachineInstr *PtrAdd =
5463 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5464 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5465 return None;
5466
5467 MachineOperand &LHS = PtrAdd->getOperand(1);
5468 MachineOperand &RHS = PtrAdd->getOperand(2);
5469 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5470
5471 // The first case is the same as selectAddrModeXRO, except we need an extend.
5472 // In this case, we try to find a shift and extend, and fold them into the
5473 // addressing mode.
5474 //
5475 // E.g.
5476 //
5477 // off_reg = G_Z/S/ANYEXT ext_reg
5478 // val = G_CONSTANT LegalShiftVal
5479 // shift = G_SHL off_reg val
5480 // ptr = G_PTR_ADD base_reg shift
5481 // x = G_LOAD ptr
5482 //
5483 // In this case we can get a load like this:
5484 //
5485 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5486 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5487 SizeInBytes, /*WantsExt=*/true);
5488 if (ExtendedShl)
5489 return ExtendedShl;
5490
5491 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5492 //
5493 // e.g.
5494 // ldr something, [base_reg, ext_reg, sxtw]
5495 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5496 return None;
5497
5498 // Check if this is an extend. We'll get an extend type if it is.
5499 AArch64_AM::ShiftExtendType Ext =
5500 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5501 if (Ext == AArch64_AM::InvalidShiftExtend)
5502 return None;
5503
5504 // Need a 32-bit wide register.
5505 MachineIRBuilder MIB(*PtrAdd);
5506 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5507 AArch64::GPR32RegClass, MIB);
5508 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5509
5510 // Base is LHS, offset is ExtReg.
5511 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5512 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5513 [=](MachineInstrBuilder &MIB) {
5514 MIB.addImm(SignExtend);
5515 MIB.addImm(0);
5516 }}};
5517}
5518
5519/// Select a "register plus unscaled signed 9-bit immediate" address. This
5520/// should only match when there is an offset that is not valid for a scaled
5521/// immediate addressing mode. The "Size" argument is the size in bytes of the
5522/// memory reference, which is needed here to know what is valid for a scaled
5523/// immediate.
5524InstructionSelector::ComplexRendererFns
5525AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5526 unsigned Size) const {
5527 MachineRegisterInfo &MRI =
5528 Root.getParent()->getParent()->getParent()->getRegInfo();
5529
5530 if (!Root.isReg())
5531 return None;
5532
5533 if (!isBaseWithConstantOffset(Root, MRI))
5534 return None;
5535
5536 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5537 if (!RootDef)
5538 return None;
5539
5540 MachineOperand &OffImm = RootDef->getOperand(2);
5541 if (!OffImm.isReg())
5542 return None;
5543 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5544 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5545 return None;
5546 int64_t RHSC;
5547 MachineOperand &RHSOp1 = RHS->getOperand(1);
5548 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5549 return None;
5550 RHSC = RHSOp1.getCImm()->getSExtValue();
5551
5552 // If the offset is valid as a scaled immediate, don't match here.
5553 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5554 return None;
5555 if (RHSC >= -256 && RHSC < 256) {
5556 MachineOperand &Base = RootDef->getOperand(1);
5557 return {{
5558 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5559 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5560 }};
5561 }
5562 return None;
5563}
5564
5565InstructionSelector::ComplexRendererFns
5566AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5567 unsigned Size,
5568 MachineRegisterInfo &MRI) const {
5569 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5570 return None;
5571 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5572 if (Adrp.getOpcode() != AArch64::ADRP)
5573 return None;
5574
5575 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5576 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5577 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global")((Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"
) ? static_cast<void> (0) : __assert_fail ("Adrp.getOperand(1).getOffset() == 0 && \"Unexpected offset in global\""
, "/build/llvm-toolchain-snapshot-12~++20210120111114+fc6677f0bbaf/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5577, __PRETTY_FUNCTION__))
;
5578 auto GV = Adrp.getOperand(1).getGlobal();
5579 if (GV->isThreadLocal())
5580 return None;
5581
5582 auto &MF = *RootDef.getParent()->getParent();
5583 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5584 return None;
5585
5586 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5587 MachineIRBuilder MIRBuilder(RootDef);
5588 Register AdrpReg = Adrp.getOperand(0).getReg();
5589 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5590 [=](MachineInstrBuilder &MIB) {
5591 MIB.addGlobalAddress(GV, /* Offset */ 0,
5592 OpFlags | AArch64II::MO_PAGEOFF |
5593 AArch64II::MO_NC);
5594 }}};
5595}
5596
5597/// Select a "register plus scaled unsigned 12-bit immediate" address. The
5598/// "Size" argument is the size in bytes of the memory reference, which
5599/// determines the scale.
5600InstructionSelector::ComplexRendererFns
5601AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5602 unsigned Size) const {
5603 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5604 MachineRegisterInfo &MRI = MF.getRegInfo();
5605
5606 if (!Root.isReg())
5607 return None;
5608
5609 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5610 if (!RootDef)
5611 return None;
5612
5613 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5614 return {{
5615 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5616 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5617 }};
5618 }
5619
5620