Bug Summary

File:llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Warning:line 5640, column 67
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64InstructionSelector.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-01-24-223304-31662-1 -x c++ /build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64MachineFunctionInfo.h"
16#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
19#include "AArch64TargetMachine.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "MCTargetDesc/AArch64MCTargetDesc.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27#include "llvm/CodeGen/GlobalISel/Utils.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineConstantPool.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineOperand.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/CodeGen/TargetOpcodes.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/PatternMatch.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/IntrinsicsAArch64.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/raw_ostream.h"
44
45#define DEBUG_TYPE"aarch64-isel" "aarch64-isel"
46
47using namespace llvm;
48using namespace MIPatternMatch;
49
50namespace {
51
52#define GET_GLOBALISEL_PREDICATE_BITSET
53#include "AArch64GenGlobalISel.inc"
54#undef GET_GLOBALISEL_PREDICATE_BITSET
55
56class AArch64InstructionSelector : public InstructionSelector {
57public:
58 AArch64InstructionSelector(const AArch64TargetMachine &TM,
59 const AArch64Subtarget &STI,
60 const AArch64RegisterBankInfo &RBI);
61
62 bool select(MachineInstr &I) override;
63 static const char *getName() { return DEBUG_TYPE"aarch64-isel"; }
64
65 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
66 CodeGenCoverage &CoverageInfo) override {
67 InstructionSelector::setupMF(MF, KB, CoverageInfo);
68
69 // hasFnAttribute() is expensive to call on every BRCOND selection, so
70 // cache it here for each run of the selector.
71 ProduceNonFlagSettingCondBr =
72 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
73 MFReturnAddr = Register();
74
75 processPHIs(MF);
76 }
77
78private:
79 /// tblgen-erated 'select' implementation, used as the initial selector for
80 /// the patterns that don't require complex C++.
81 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
82
83 // A lowering phase that runs before any selection attempts.
84 // Returns true if the instruction was modified.
85 bool preISelLower(MachineInstr &I);
86
87 // An early selection function that runs before the selectImpl() call.
88 bool earlySelect(MachineInstr &I) const;
89
90 // Do some preprocessing of G_PHIs before we begin selection.
91 void processPHIs(MachineFunction &MF);
92
93 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
94
95 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
96 bool contractCrossBankCopyIntoStore(MachineInstr &I,
97 MachineRegisterInfo &MRI);
98
99 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
100
101 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
102 MachineRegisterInfo &MRI) const;
103 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
104 MachineRegisterInfo &MRI) const;
105
106 ///@{
107 /// Helper functions for selectCompareBranch.
108 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
109 MachineIRBuilder &MIB) const;
110 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
111 MachineIRBuilder &MIB) const;
112 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
113 MachineIRBuilder &MIB) const;
114 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
115 MachineBasicBlock *DstMBB,
116 MachineIRBuilder &MIB) const;
117 ///@}
118
119 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
120 MachineRegisterInfo &MRI) const;
121
122 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const;
123 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
124
125 // Helper to generate an equivalent of scalar_to_vector into a new register,
126 // returned via 'Dst'.
127 MachineInstr *emitScalarToVector(unsigned EltSize,
128 const TargetRegisterClass *DstRC,
129 Register Scalar,
130 MachineIRBuilder &MIRBuilder) const;
131
132 /// Emit a lane insert into \p DstReg, or a new vector register if None is
133 /// provided.
134 ///
135 /// The lane inserted into is defined by \p LaneIdx. The vector source
136 /// register is given by \p SrcReg. The register containing the element is
137 /// given by \p EltReg.
138 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
139 Register EltReg, unsigned LaneIdx,
140 const RegisterBank &RB,
141 MachineIRBuilder &MIRBuilder) const;
142 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
143 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
144 MachineRegisterInfo &MRI) const;
145 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
146 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
147 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
148
149 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
152 bool selectSplitVectorUnmerge(MachineInstr &I,
153 MachineRegisterInfo &MRI) const;
154 bool selectIntrinsicWithSideEffects(MachineInstr &I,
155 MachineRegisterInfo &MRI) const;
156 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
157 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
158 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
159 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
160 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
161 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
162 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
163 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const;
164
165 unsigned emitConstantPoolEntry(const Constant *CPVal,
166 MachineFunction &MF) const;
167 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
168 MachineIRBuilder &MIRBuilder) const;
169
170 // Emit a vector concat operation.
171 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
172 Register Op2,
173 MachineIRBuilder &MIRBuilder) const;
174
175 // Emit an integer compare between LHS and RHS, which checks for Predicate.
176 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
177 MachineOperand &Predicate,
178 MachineIRBuilder &MIRBuilder) const;
179
180 /// Emit a floating point comparison between \p LHS and \p RHS.
181 /// \p Pred if given is the intended predicate to use.
182 MachineInstr *emitFPCompare(Register LHS, Register RHS,
183 MachineIRBuilder &MIRBuilder,
184 Optional<CmpInst::Predicate> = None) const;
185
186 MachineInstr *emitInstr(unsigned Opcode,
187 std::initializer_list<llvm::DstOp> DstOps,
188 std::initializer_list<llvm::SrcOp> SrcOps,
189 MachineIRBuilder &MIRBuilder,
190 const ComplexRendererFns &RenderFns = None) const;
191 /// Helper function to emit an add or sub instruction.
192 ///
193 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
194 /// in a specific order.
195 ///
196 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
197 ///
198 /// \code
199 /// const std::array<std::array<unsigned, 2>, 4> Table {
200 /// {{AArch64::ADDXri, AArch64::ADDWri},
201 /// {AArch64::ADDXrs, AArch64::ADDWrs},
202 /// {AArch64::ADDXrr, AArch64::ADDWrr},
203 /// {AArch64::SUBXri, AArch64::SUBWri},
204 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
205 /// \endcode
206 ///
207 /// Each row in the table corresponds to a different addressing mode. Each
208 /// column corresponds to a different register size.
209 ///
210 /// \attention Rows must be structured as follows:
211 /// - Row 0: The ri opcode variants
212 /// - Row 1: The rs opcode variants
213 /// - Row 2: The rr opcode variants
214 /// - Row 3: The ri opcode variants for negative immediates
215 /// - Row 4: The rx opcode variants
216 ///
217 /// \attention Columns must be structured as follows:
218 /// - Column 0: The 64-bit opcode variants
219 /// - Column 1: The 32-bit opcode variants
220 ///
221 /// \p Dst is the destination register of the binop to emit.
222 /// \p LHS is the left-hand operand of the binop to emit.
223 /// \p RHS is the right-hand operand of the binop to emit.
224 MachineInstr *emitAddSub(
225 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
226 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
227 MachineIRBuilder &MIRBuilder) const;
228 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
229 MachineOperand &RHS,
230 MachineIRBuilder &MIRBuilder) const;
231 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
232 MachineIRBuilder &MIRBuilder) const;
233 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
234 MachineIRBuilder &MIRBuilder) const;
235 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
236 MachineIRBuilder &MIRBuilder) const;
237 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
238 MachineIRBuilder &MIRBuilder) const;
239 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
240 AArch64CC::CondCode CC,
241 MachineIRBuilder &MIRBuilder) const;
242 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
243 const RegisterBank &DstRB, LLT ScalarTy,
244 Register VecReg, unsigned LaneIdx,
245 MachineIRBuilder &MIRBuilder) const;
246
247 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
248 /// materialized using a FMOV instruction, then update MI and return it.
249 /// Otherwise, do nothing and return a nullptr.
250 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
251 MachineRegisterInfo &MRI) const;
252
253 /// Emit a CSet for an integer compare.
254 ///
255 /// \p DefReg is expected to be a 32-bit scalar register.
256 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
257 MachineIRBuilder &MIRBuilder) const;
258 /// Emit a CSet for a FP compare.
259 ///
260 /// \p Dst is expected to be a 32-bit scalar register.
261 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
262 MachineIRBuilder &MIRBuilder) const;
263
264 /// Emit the overflow op for \p Opcode.
265 ///
266 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
267 /// G_USUBO, etc.
268 std::pair<MachineInstr *, AArch64CC::CondCode>
269 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
270 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
271
272 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
273 /// \p IsNegative is true if the test should be "not zero".
274 /// This will also optimize the test bit instruction when possible.
275 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
276 MachineBasicBlock *DstMBB,
277 MachineIRBuilder &MIB) const;
278
279 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
280 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
281 MachineBasicBlock *DestMBB,
282 MachineIRBuilder &MIB) const;
283
284 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
285 // We use these manually instead of using the importer since it doesn't
286 // support SDNodeXForm.
287 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
288 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
289 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
290 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
291
292 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
293 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
294 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
295
296 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
297 unsigned Size) const;
298
299 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
300 return selectAddrModeUnscaled(Root, 1);
301 }
302 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
303 return selectAddrModeUnscaled(Root, 2);
304 }
305 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
306 return selectAddrModeUnscaled(Root, 4);
307 }
308 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
309 return selectAddrModeUnscaled(Root, 8);
310 }
311 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
312 return selectAddrModeUnscaled(Root, 16);
313 }
314
315 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
316 /// from complex pattern matchers like selectAddrModeIndexed().
317 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
318 MachineRegisterInfo &MRI) const;
319
320 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
321 unsigned Size) const;
322 template <int Width>
323 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
324 return selectAddrModeIndexed(Root, Width / 8);
325 }
326
327 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
328 const MachineRegisterInfo &MRI) const;
329 ComplexRendererFns
330 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
331 unsigned SizeInBytes) const;
332
333 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
334 /// or not a shift + extend should be folded into an addressing mode. Returns
335 /// None when this is not profitable or possible.
336 ComplexRendererFns
337 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
338 MachineOperand &Offset, unsigned SizeInBytes,
339 bool WantsExt) const;
340 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
341 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
342 unsigned SizeInBytes) const;
343 template <int Width>
344 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
345 return selectAddrModeXRO(Root, Width / 8);
346 }
347
348 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
349 unsigned SizeInBytes) const;
350 template <int Width>
351 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
352 return selectAddrModeWRO(Root, Width / 8);
353 }
354
355 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
356
357 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
358 return selectShiftedRegister(Root);
359 }
360
361 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
362 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
363 // For now, make them the same. The only difference between the two is that
364 // logical shifts are allowed to fold in rotates. Otherwise, these are
365 // functionally the same.
366 return selectShiftedRegister(Root);
367 }
368
369 /// Given an extend instruction, determine the correct shift-extend type for
370 /// that instruction.
371 ///
372 /// If the instruction is going to be used in a load or store, pass
373 /// \p IsLoadStore = true.
374 AArch64_AM::ShiftExtendType
375 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
376 bool IsLoadStore = false) const;
377
378 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
379 ///
380 /// \returns Either \p Reg if no change was necessary, or the new register
381 /// created by moving \p Reg.
382 ///
383 /// Note: This uses emitCopy right now.
384 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
385 MachineIRBuilder &MIB) const;
386
387 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
388
389 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
390 int OpIdx = -1) const;
391 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
392 int OpIdx = -1) const;
393 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
394 int OpIdx = -1) const;
395
396 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
397 void materializeLargeCMVal(MachineInstr &I, const Value *V,
398 unsigned OpFlags) const;
399
400 // Optimization methods.
401 bool tryOptSelect(MachineInstr &MI) const;
402 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
403 MachineOperand &Predicate,
404 MachineIRBuilder &MIRBuilder) const;
405
406 /// Return true if \p MI is a load or store of \p NumBytes bytes.
407 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
408
409 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
410 /// register zeroed out. In other words, the result of MI has been explicitly
411 /// zero extended.
412 bool isDef32(const MachineInstr &MI) const;
413
414 const AArch64TargetMachine &TM;
415 const AArch64Subtarget &STI;
416 const AArch64InstrInfo &TII;
417 const AArch64RegisterInfo &TRI;
418 const AArch64RegisterBankInfo &RBI;
419
420 bool ProduceNonFlagSettingCondBr = false;
421
422 // Some cached values used during selection.
423 // We use LR as a live-in register, and we keep track of it here as it can be
424 // clobbered by calls.
425 Register MFReturnAddr;
426
427#define GET_GLOBALISEL_PREDICATES_DECL
428#include "AArch64GenGlobalISel.inc"
429#undef GET_GLOBALISEL_PREDICATES_DECL
430
431// We declare the temporaries used by selectImpl() in the class to minimize the
432// cost of constructing placeholder values.
433#define GET_GLOBALISEL_TEMPORARIES_DECL
434#include "AArch64GenGlobalISel.inc"
435#undef GET_GLOBALISEL_TEMPORARIES_DECL
436};
437
438} // end anonymous namespace
439
440#define GET_GLOBALISEL_IMPL
441#include "AArch64GenGlobalISel.inc"
442#undef GET_GLOBALISEL_IMPL
443
444AArch64InstructionSelector::AArch64InstructionSelector(
445 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
446 const AArch64RegisterBankInfo &RBI)
447 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
448 TRI(*STI.getRegisterInfo()), RBI(RBI),
449#define GET_GLOBALISEL_PREDICATES_INIT
450#include "AArch64GenGlobalISel.inc"
451#undef GET_GLOBALISEL_PREDICATES_INIT
452#define GET_GLOBALISEL_TEMPORARIES_INIT
453#include "AArch64GenGlobalISel.inc"
454#undef GET_GLOBALISEL_TEMPORARIES_INIT
455{
456}
457
458// FIXME: This should be target-independent, inferred from the types declared
459// for each class in the bank.
460static const TargetRegisterClass *
461getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
462 const RegisterBankInfo &RBI,
463 bool GetAllRegSet = false) {
464 if (RB.getID() == AArch64::GPRRegBankID) {
465 if (Ty.getSizeInBits() <= 32)
466 return GetAllRegSet ? &AArch64::GPR32allRegClass
467 : &AArch64::GPR32RegClass;
468 if (Ty.getSizeInBits() == 64)
469 return GetAllRegSet ? &AArch64::GPR64allRegClass
470 : &AArch64::GPR64RegClass;
471 return nullptr;
472 }
473
474 if (RB.getID() == AArch64::FPRRegBankID) {
475 if (Ty.getSizeInBits() <= 16)
476 return &AArch64::FPR16RegClass;
477 if (Ty.getSizeInBits() == 32)
478 return &AArch64::FPR32RegClass;
479 if (Ty.getSizeInBits() == 64)
480 return &AArch64::FPR64RegClass;
481 if (Ty.getSizeInBits() == 128)
482 return &AArch64::FPR128RegClass;
483 return nullptr;
484 }
485
486 return nullptr;
487}
488
489/// Given a register bank, and size in bits, return the smallest register class
490/// that can represent that combination.
491static const TargetRegisterClass *
492getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
493 bool GetAllRegSet = false) {
494 unsigned RegBankID = RB.getID();
495
496 if (RegBankID == AArch64::GPRRegBankID) {
497 if (SizeInBits <= 32)
498 return GetAllRegSet ? &AArch64::GPR32allRegClass
499 : &AArch64::GPR32RegClass;
500 if (SizeInBits == 64)
501 return GetAllRegSet ? &AArch64::GPR64allRegClass
502 : &AArch64::GPR64RegClass;
503 }
504
505 if (RegBankID == AArch64::FPRRegBankID) {
506 switch (SizeInBits) {
507 default:
508 return nullptr;
509 case 8:
510 return &AArch64::FPR8RegClass;
511 case 16:
512 return &AArch64::FPR16RegClass;
513 case 32:
514 return &AArch64::FPR32RegClass;
515 case 64:
516 return &AArch64::FPR64RegClass;
517 case 128:
518 return &AArch64::FPR128RegClass;
519 }
520 }
521
522 return nullptr;
523}
524
525/// Returns the correct subregister to use for a given register class.
526static bool getSubRegForClass(const TargetRegisterClass *RC,
527 const TargetRegisterInfo &TRI, unsigned &SubReg) {
528 switch (TRI.getRegSizeInBits(*RC)) {
529 case 8:
530 SubReg = AArch64::bsub;
531 break;
532 case 16:
533 SubReg = AArch64::hsub;
534 break;
535 case 32:
536 if (RC != &AArch64::FPR32RegClass)
537 SubReg = AArch64::sub_32;
538 else
539 SubReg = AArch64::ssub;
540 break;
541 case 64:
542 SubReg = AArch64::dsub;
543 break;
544 default:
545 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
546 dbgs() << "Couldn't find appropriate subregister for register class.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't find appropriate subregister for register class."
; } } while (false)
;
547 return false;
548 }
549
550 return true;
551}
552
553/// Returns the minimum size the given register bank can hold.
554static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
555 switch (RB.getID()) {
556 case AArch64::GPRRegBankID:
557 return 32;
558 case AArch64::FPRRegBankID:
559 return 8;
560 default:
561 llvm_unreachable("Tried to get minimum size for unknown register bank.")::llvm::llvm_unreachable_internal("Tried to get minimum size for unknown register bank."
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 561)
;
562 }
563}
564
565static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
566 auto &MI = *Root.getParent();
567 auto &MBB = *MI.getParent();
568 auto &MF = *MBB.getParent();
569 auto &MRI = MF.getRegInfo();
570 uint64_t Immed;
571 if (Root.isImm())
572 Immed = Root.getImm();
573 else if (Root.isCImm())
574 Immed = Root.getCImm()->getZExtValue();
575 else if (Root.isReg()) {
576 auto ValAndVReg =
577 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
578 if (!ValAndVReg)
579 return None;
580 Immed = ValAndVReg->Value.getSExtValue();
581 } else
582 return None;
583 return Immed;
584}
585
586/// Check whether \p I is a currently unsupported binary operation:
587/// - it has an unsized type
588/// - an operand is not a vreg
589/// - all operands are not in the same bank
590/// These are checks that should someday live in the verifier, but right now,
591/// these are mostly limitations of the aarch64 selector.
592static bool unsupportedBinOp(const MachineInstr &I,
593 const AArch64RegisterBankInfo &RBI,
594 const MachineRegisterInfo &MRI,
595 const AArch64RegisterInfo &TRI) {
596 LLT Ty = MRI.getType(I.getOperand(0).getReg());
597 if (!Ty.isValid()) {
598 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic binop register should be typed\n"
; } } while (false)
;
599 return true;
600 }
601
602 const RegisterBank *PrevOpBank = nullptr;
603 for (auto &MO : I.operands()) {
604 // FIXME: Support non-register operands.
605 if (!MO.isReg()) {
606 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst non-reg operands are unsupported\n"
; } } while (false)
;
607 return true;
608 }
609
610 // FIXME: Can generic operations have physical registers operands? If
611 // so, this will need to be taught about that, and we'll need to get the
612 // bank out of the minimal class for the register.
613 // Either way, this needs to be documented (and possibly verified).
614 if (!Register::isVirtualRegister(MO.getReg())) {
615 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst has physical register operand\n"
; } } while (false)
;
616 return true;
617 }
618
619 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
620 if (!OpBank) {
621 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic register has no bank or class\n"
; } } while (false)
;
622 return true;
623 }
624
625 if (PrevOpBank && OpBank != PrevOpBank) {
626 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic inst operands have different banks\n"
; } } while (false)
;
627 return true;
628 }
629 PrevOpBank = OpBank;
630 }
631 return false;
632}
633
634/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
635/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
636/// and of size \p OpSize.
637/// \returns \p GenericOpc if the combination is unsupported.
638static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
639 unsigned OpSize) {
640 switch (RegBankID) {
641 case AArch64::GPRRegBankID:
642 if (OpSize == 32) {
643 switch (GenericOpc) {
644 case TargetOpcode::G_SHL:
645 return AArch64::LSLVWr;
646 case TargetOpcode::G_LSHR:
647 return AArch64::LSRVWr;
648 case TargetOpcode::G_ASHR:
649 return AArch64::ASRVWr;
650 default:
651 return GenericOpc;
652 }
653 } else if (OpSize == 64) {
654 switch (GenericOpc) {
655 case TargetOpcode::G_PTR_ADD:
656 return AArch64::ADDXrr;
657 case TargetOpcode::G_SHL:
658 return AArch64::LSLVXr;
659 case TargetOpcode::G_LSHR:
660 return AArch64::LSRVXr;
661 case TargetOpcode::G_ASHR:
662 return AArch64::ASRVXr;
663 default:
664 return GenericOpc;
665 }
666 }
667 break;
668 case AArch64::FPRRegBankID:
669 switch (OpSize) {
670 case 32:
671 switch (GenericOpc) {
672 case TargetOpcode::G_FADD:
673 return AArch64::FADDSrr;
674 case TargetOpcode::G_FSUB:
675 return AArch64::FSUBSrr;
676 case TargetOpcode::G_FMUL:
677 return AArch64::FMULSrr;
678 case TargetOpcode::G_FDIV:
679 return AArch64::FDIVSrr;
680 default:
681 return GenericOpc;
682 }
683 case 64:
684 switch (GenericOpc) {
685 case TargetOpcode::G_FADD:
686 return AArch64::FADDDrr;
687 case TargetOpcode::G_FSUB:
688 return AArch64::FSUBDrr;
689 case TargetOpcode::G_FMUL:
690 return AArch64::FMULDrr;
691 case TargetOpcode::G_FDIV:
692 return AArch64::FDIVDrr;
693 case TargetOpcode::G_OR:
694 return AArch64::ORRv8i8;
695 default:
696 return GenericOpc;
697 }
698 }
699 break;
700 }
701 return GenericOpc;
702}
703
704/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
705/// appropriate for the (value) register bank \p RegBankID and of memory access
706/// size \p OpSize. This returns the variant with the base+unsigned-immediate
707/// addressing mode (e.g., LDRXui).
708/// \returns \p GenericOpc if the combination is unsupported.
709static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
710 unsigned OpSize) {
711 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
712 switch (RegBankID) {
713 case AArch64::GPRRegBankID:
714 switch (OpSize) {
715 case 8:
716 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
717 case 16:
718 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
719 case 32:
720 return isStore ? AArch64::STRWui : AArch64::LDRWui;
721 case 64:
722 return isStore ? AArch64::STRXui : AArch64::LDRXui;
723 }
724 break;
725 case AArch64::FPRRegBankID:
726 switch (OpSize) {
727 case 8:
728 return isStore ? AArch64::STRBui : AArch64::LDRBui;
729 case 16:
730 return isStore ? AArch64::STRHui : AArch64::LDRHui;
731 case 32:
732 return isStore ? AArch64::STRSui : AArch64::LDRSui;
733 case 64:
734 return isStore ? AArch64::STRDui : AArch64::LDRDui;
735 }
736 break;
737 }
738 return GenericOpc;
739}
740
741#ifndef NDEBUG
742/// Helper function that verifies that we have a valid copy at the end of
743/// selectCopy. Verifies that the source and dest have the expected sizes and
744/// then returns true.
745static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
746 const MachineRegisterInfo &MRI,
747 const TargetRegisterInfo &TRI,
748 const RegisterBankInfo &RBI) {
749 const Register DstReg = I.getOperand(0).getReg();
750 const Register SrcReg = I.getOperand(1).getReg();
751 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
752 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
753
754 // Make sure the size of the source and dest line up.
755 assert((((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
756 (DstSize == SrcSize ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
757 // Copies are a mean to setup initial types, the number of(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
758 // bits may not exactly match.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
759 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
760 // Copies are a mean to copy bits around, as long as we are(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
761 // on the same register class, that's fine. Otherwise, that(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
762 // means we need some SUBREG_TO_REG or AND & co.(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
763 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
764 "Copy with different width?!")(((DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg
) && DstSize <= SrcSize) || (((DstSize + 31) / 32 ==
(SrcSize + 31) / 32) && DstSize > SrcSize)) &&
"Copy with different width?!") ? static_cast<void> (0)
: __assert_fail ("(DstSize == SrcSize || (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && \"Copy with different width?!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 764, __PRETTY_FUNCTION__))
;
765
766 // Check the size of the destination.
767 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 768, __PRETTY_FUNCTION__))
768 "GPRs cannot get more than 64-bit width values")(((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID
) && "GPRs cannot get more than 64-bit width values")
? static_cast<void> (0) : __assert_fail ("(DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && \"GPRs cannot get more than 64-bit width values\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 768, __PRETTY_FUNCTION__))
;
769
770 return true;
771}
772#endif
773
774/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
775/// to \p *To.
776///
777/// E.g "To = COPY SrcReg:SubReg"
778static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
779 const RegisterBankInfo &RBI, Register SrcReg,
780 const TargetRegisterClass *To, unsigned SubReg) {
781 assert(SrcReg.isValid() && "Expected a valid source register?")((SrcReg.isValid() && "Expected a valid source register?"
) ? static_cast<void> (0) : __assert_fail ("SrcReg.isValid() && \"Expected a valid source register?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 781, __PRETTY_FUNCTION__))
;
782 assert(To && "Destination register class cannot be null")((To && "Destination register class cannot be null") ?
static_cast<void> (0) : __assert_fail ("To && \"Destination register class cannot be null\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 782, __PRETTY_FUNCTION__))
;
783 assert(SubReg && "Expected a valid subregister")((SubReg && "Expected a valid subregister") ? static_cast
<void> (0) : __assert_fail ("SubReg && \"Expected a valid subregister\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 783, __PRETTY_FUNCTION__))
;
784
785 MachineIRBuilder MIB(I);
786 auto SubRegCopy =
787 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
788 MachineOperand &RegOp = I.getOperand(1);
789 RegOp.setReg(SubRegCopy.getReg(0));
790
791 // It's possible that the destination register won't be constrained. Make
792 // sure that happens.
793 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
794 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
795
796 return true;
797}
798
799/// Helper function to get the source and destination register classes for a
800/// copy. Returns a std::pair containing the source register class for the
801/// copy, and the destination register class for the copy. If a register class
802/// cannot be determined, then it will be nullptr.
803static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
804getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
805 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
806 const RegisterBankInfo &RBI) {
807 Register DstReg = I.getOperand(0).getReg();
808 Register SrcReg = I.getOperand(1).getReg();
809 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
810 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
811 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
812 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
813
814 // Special casing for cross-bank copies of s1s. We can technically represent
815 // a 1-bit value with any size of register. The minimum size for a GPR is 32
816 // bits. So, we need to put the FPR on 32 bits as well.
817 //
818 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
819 // then we can pull it into the helpers that get the appropriate class for a
820 // register bank. Or make a new helper that carries along some constraint
821 // information.
822 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
823 SrcSize = DstSize = 32;
824
825 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
826 getMinClassForRegBank(DstRegBank, DstSize, true)};
827}
828
829static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
830 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
831 const RegisterBankInfo &RBI) {
832 Register DstReg = I.getOperand(0).getReg();
833 Register SrcReg = I.getOperand(1).getReg();
834 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
835 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
836
837 // Find the correct register classes for the source and destination registers.
838 const TargetRegisterClass *SrcRC;
839 const TargetRegisterClass *DstRC;
840 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
841
842 if (!DstRC) {
843 LLVM_DEBUG(dbgs() << "Unexpected dest size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
844 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected dest size " <<
RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'; } } while
(false)
;
845 return false;
846 }
847
848 // A couple helpers below, for making sure that the copy we produce is valid.
849
850 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
851 // to verify that the src and dst are the same size, since that's handled by
852 // the SUBREG_TO_REG.
853 bool KnownValid = false;
854
855 // Returns true, or asserts if something we don't expect happens. Instead of
856 // returning true, we return isValidCopy() to ensure that we verify the
857 // result.
858 auto CheckCopy = [&]() {
859 // If we have a bitcast or something, we can't have physical registers.
860 assert((I.isCopy() ||(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
861 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
862 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
863 "No phys reg on generic operator!")(((I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(
0).getReg()) && !Register::isPhysicalRegister(I.getOperand
(1).getReg()))) && "No phys reg on generic operator!"
) ? static_cast<void> (0) : __assert_fail ("(I.isCopy() || (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && \"No phys reg on generic operator!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 863, __PRETTY_FUNCTION__))
;
864 bool ValidCopy = true;
865#ifndef NDEBUG
866 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
867 assert(ValidCopy && "Invalid copy.")((ValidCopy && "Invalid copy.") ? static_cast<void
> (0) : __assert_fail ("ValidCopy && \"Invalid copy.\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 867, __PRETTY_FUNCTION__))
;
868 (void)KnownValid;
869#endif
870 return ValidCopy;
871 };
872
873 // Is this a copy? If so, then we may need to insert a subregister copy.
874 if (I.isCopy()) {
875 // Yes. Check if there's anything to fix up.
876 if (!SrcRC) {
877 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine source register class\n"
; } } while (false)
;
878 return false;
879 }
880
881 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
882 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
883 unsigned SubReg;
884
885 // If the source bank doesn't support a subregister copy small enough,
886 // then we first need to copy to the destination bank.
887 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
888 const TargetRegisterClass *DstTempRC =
889 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
890 getSubRegForClass(DstRC, TRI, SubReg);
891
892 MachineIRBuilder MIB(I);
893 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
894 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
895 } else if (SrcSize > DstSize) {
896 // If the source register is bigger than the destination we need to
897 // perform a subregister copy.
898 const TargetRegisterClass *SubRegRC =
899 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
900 getSubRegForClass(SubRegRC, TRI, SubReg);
901 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
902 } else if (DstSize > SrcSize) {
903 // If the destination register is bigger than the source we need to do
904 // a promotion using SUBREG_TO_REG.
905 const TargetRegisterClass *PromotionRC =
906 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
907 getSubRegForClass(SrcRC, TRI, SubReg);
908
909 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
910 BuildMI(*I.getParent(), I, I.getDebugLoc(),
911 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
912 .addImm(0)
913 .addUse(SrcReg)
914 .addImm(SubReg);
915 MachineOperand &RegOp = I.getOperand(1);
916 RegOp.setReg(PromoteReg);
917
918 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
919 KnownValid = true;
920 }
921
922 // If the destination is a physical register, then there's nothing to
923 // change, so we're done.
924 if (Register::isPhysicalRegister(DstReg))
925 return CheckCopy();
926 }
927
928 // No need to constrain SrcReg. It will get constrained when we hit another
929 // of its use or its defs. Copies do not have constraints.
930 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
931 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
932 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(I.getOpcode()) << " operand\n"; } } while (
false)
;
933 return false;
934 }
935 I.setDesc(TII.get(AArch64::COPY));
936 return CheckCopy();
937}
938
939static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
940 if (!DstTy.isScalar() || !SrcTy.isScalar())
941 return GenericOpc;
942
943 const unsigned DstSize = DstTy.getSizeInBits();
944 const unsigned SrcSize = SrcTy.getSizeInBits();
945
946 switch (DstSize) {
947 case 32:
948 switch (SrcSize) {
949 case 32:
950 switch (GenericOpc) {
951 case TargetOpcode::G_SITOFP:
952 return AArch64::SCVTFUWSri;
953 case TargetOpcode::G_UITOFP:
954 return AArch64::UCVTFUWSri;
955 case TargetOpcode::G_FPTOSI:
956 return AArch64::FCVTZSUWSr;
957 case TargetOpcode::G_FPTOUI:
958 return AArch64::FCVTZUUWSr;
959 default:
960 return GenericOpc;
961 }
962 case 64:
963 switch (GenericOpc) {
964 case TargetOpcode::G_SITOFP:
965 return AArch64::SCVTFUXSri;
966 case TargetOpcode::G_UITOFP:
967 return AArch64::UCVTFUXSri;
968 case TargetOpcode::G_FPTOSI:
969 return AArch64::FCVTZSUWDr;
970 case TargetOpcode::G_FPTOUI:
971 return AArch64::FCVTZUUWDr;
972 default:
973 return GenericOpc;
974 }
975 default:
976 return GenericOpc;
977 }
978 case 64:
979 switch (SrcSize) {
980 case 32:
981 switch (GenericOpc) {
982 case TargetOpcode::G_SITOFP:
983 return AArch64::SCVTFUWDri;
984 case TargetOpcode::G_UITOFP:
985 return AArch64::UCVTFUWDri;
986 case TargetOpcode::G_FPTOSI:
987 return AArch64::FCVTZSUXSr;
988 case TargetOpcode::G_FPTOUI:
989 return AArch64::FCVTZUUXSr;
990 default:
991 return GenericOpc;
992 }
993 case 64:
994 switch (GenericOpc) {
995 case TargetOpcode::G_SITOFP:
996 return AArch64::SCVTFUXDri;
997 case TargetOpcode::G_UITOFP:
998 return AArch64::UCVTFUXDri;
999 case TargetOpcode::G_FPTOSI:
1000 return AArch64::FCVTZSUXDr;
1001 case TargetOpcode::G_FPTOUI:
1002 return AArch64::FCVTZUUXDr;
1003 default:
1004 return GenericOpc;
1005 }
1006 default:
1007 return GenericOpc;
1008 }
1009 default:
1010 return GenericOpc;
1011 };
1012 return GenericOpc;
1013}
1014
1015MachineInstr *
1016AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1017 Register False, AArch64CC::CondCode CC,
1018 MachineIRBuilder &MIB) const {
1019 MachineRegisterInfo &MRI = *MIB.getMRI();
1020 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1022, __PRETTY_FUNCTION__))
1021 RBI.getRegBank(True, MRI, TRI)->getID() &&((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1022, __PRETTY_FUNCTION__))
1022 "Expected both select operands to have the same regbank?")((RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank
(True, MRI, TRI)->getID() && "Expected both select operands to have the same regbank?"
) ? static_cast<void> (0) : __assert_fail ("RBI.getRegBank(False, MRI, TRI)->getID() == RBI.getRegBank(True, MRI, TRI)->getID() && \"Expected both select operands to have the same regbank?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1022, __PRETTY_FUNCTION__))
;
1023 LLT Ty = MRI.getType(True);
1024 if (Ty.isVector())
1025 return nullptr;
1026 const unsigned Size = Ty.getSizeInBits();
1027 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1028, __PRETTY_FUNCTION__))
1028 "Expected 32 bit or 64 bit select only?")(((Size == 32 || Size == 64) && "Expected 32 bit or 64 bit select only?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected 32 bit or 64 bit select only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1028, __PRETTY_FUNCTION__))
;
1029 const bool Is32Bit = Size == 32;
1030 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1031 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1032 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1033 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1034 return &*FCSel;
1035 }
1036
1037 // By default, we'll try and emit a CSEL.
1038 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1039 bool Optimized = false;
1040 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1041 &Optimized](Register &Reg, Register &OtherReg,
1042 bool Invert) {
1043 if (Optimized)
1044 return false;
1045
1046 // Attempt to fold:
1047 //
1048 // %sub = G_SUB 0, %x
1049 // %select = G_SELECT cc, %reg, %sub
1050 //
1051 // Into:
1052 // %select = CSNEG %reg, %x, cc
1053 Register MatchReg;
1054 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1055 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1056 Reg = MatchReg;
1057 if (Invert) {
1058 CC = AArch64CC::getInvertedCondCode(CC);
1059 std::swap(Reg, OtherReg);
1060 }
1061 return true;
1062 }
1063
1064 // Attempt to fold:
1065 //
1066 // %xor = G_XOR %x, -1
1067 // %select = G_SELECT cc, %reg, %xor
1068 //
1069 // Into:
1070 // %select = CSINV %reg, %x, cc
1071 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1072 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1073 Reg = MatchReg;
1074 if (Invert) {
1075 CC = AArch64CC::getInvertedCondCode(CC);
1076 std::swap(Reg, OtherReg);
1077 }
1078 return true;
1079 }
1080
1081 // Attempt to fold:
1082 //
1083 // %add = G_ADD %x, 1
1084 // %select = G_SELECT cc, %reg, %add
1085 //
1086 // Into:
1087 // %select = CSINC %reg, %x, cc
1088 if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) {
1089 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1090 Reg = MatchReg;
1091 if (Invert) {
1092 CC = AArch64CC::getInvertedCondCode(CC);
1093 std::swap(Reg, OtherReg);
1094 }
1095 return true;
1096 }
1097
1098 return false;
1099 };
1100
1101 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1102 // true/false values are constants.
1103 // FIXME: All of these patterns already exist in tablegen. We should be
1104 // able to import these.
1105 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1106 &Optimized]() {
1107 if (Optimized)
1108 return false;
1109 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
1110 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
1111 if (!TrueCst && !FalseCst)
1112 return false;
1113
1114 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1115 if (TrueCst && FalseCst) {
1116 int64_t T = TrueCst->Value.getSExtValue();
1117 int64_t F = FalseCst->Value.getSExtValue();
1118
1119 if (T == 0 && F == 1) {
1120 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1121 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1122 True = ZReg;
1123 False = ZReg;
1124 return true;
1125 }
1126
1127 if (T == 0 && F == -1) {
1128 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1129 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1130 True = ZReg;
1131 False = ZReg;
1132 return true;
1133 }
1134 }
1135
1136 if (TrueCst) {
1137 int64_t T = TrueCst->Value.getSExtValue();
1138 if (T == 1) {
1139 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1140 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1141 True = False;
1142 False = ZReg;
1143 CC = AArch64CC::getInvertedCondCode(CC);
1144 return true;
1145 }
1146
1147 if (T == -1) {
1148 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1149 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1150 True = False;
1151 False = ZReg;
1152 CC = AArch64CC::getInvertedCondCode(CC);
1153 return true;
1154 }
1155 }
1156
1157 if (FalseCst) {
1158 int64_t F = FalseCst->Value.getSExtValue();
1159 if (F == 1) {
1160 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1161 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1162 False = ZReg;
1163 return true;
1164 }
1165
1166 if (F == -1) {
1167 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1168 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1169 False = ZReg;
1170 return true;
1171 }
1172 }
1173 return false;
1174 };
1175
1176 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1177 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1178 Optimized |= TryOptSelectCst();
1179 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1180 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1181 return &*SelectInst;
1182}
1183
1184static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1185 switch (P) {
1186 default:
1187 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1187)
;
1188 case CmpInst::ICMP_NE:
1189 return AArch64CC::NE;
1190 case CmpInst::ICMP_EQ:
1191 return AArch64CC::EQ;
1192 case CmpInst::ICMP_SGT:
1193 return AArch64CC::GT;
1194 case CmpInst::ICMP_SGE:
1195 return AArch64CC::GE;
1196 case CmpInst::ICMP_SLT:
1197 return AArch64CC::LT;
1198 case CmpInst::ICMP_SLE:
1199 return AArch64CC::LE;
1200 case CmpInst::ICMP_UGT:
1201 return AArch64CC::HI;
1202 case CmpInst::ICMP_UGE:
1203 return AArch64CC::HS;
1204 case CmpInst::ICMP_ULT:
1205 return AArch64CC::LO;
1206 case CmpInst::ICMP_ULE:
1207 return AArch64CC::LS;
1208 }
1209}
1210
1211static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1212 AArch64CC::CondCode &CondCode,
1213 AArch64CC::CondCode &CondCode2) {
1214 CondCode2 = AArch64CC::AL;
1215 switch (P) {
1216 default:
1217 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1217)
;
1218 case CmpInst::FCMP_OEQ:
1219 CondCode = AArch64CC::EQ;
1220 break;
1221 case CmpInst::FCMP_OGT:
1222 CondCode = AArch64CC::GT;
1223 break;
1224 case CmpInst::FCMP_OGE:
1225 CondCode = AArch64CC::GE;
1226 break;
1227 case CmpInst::FCMP_OLT:
1228 CondCode = AArch64CC::MI;
1229 break;
1230 case CmpInst::FCMP_OLE:
1231 CondCode = AArch64CC::LS;
1232 break;
1233 case CmpInst::FCMP_ONE:
1234 CondCode = AArch64CC::MI;
1235 CondCode2 = AArch64CC::GT;
1236 break;
1237 case CmpInst::FCMP_ORD:
1238 CondCode = AArch64CC::VC;
1239 break;
1240 case CmpInst::FCMP_UNO:
1241 CondCode = AArch64CC::VS;
1242 break;
1243 case CmpInst::FCMP_UEQ:
1244 CondCode = AArch64CC::EQ;
1245 CondCode2 = AArch64CC::VS;
1246 break;
1247 case CmpInst::FCMP_UGT:
1248 CondCode = AArch64CC::HI;
1249 break;
1250 case CmpInst::FCMP_UGE:
1251 CondCode = AArch64CC::PL;
1252 break;
1253 case CmpInst::FCMP_ULT:
1254 CondCode = AArch64CC::LT;
1255 break;
1256 case CmpInst::FCMP_ULE:
1257 CondCode = AArch64CC::LE;
1258 break;
1259 case CmpInst::FCMP_UNE:
1260 CondCode = AArch64CC::NE;
1261 break;
1262 }
1263}
1264
1265/// Return a register which can be used as a bit to test in a TB(N)Z.
1266static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1267 MachineRegisterInfo &MRI) {
1268 assert(Reg.isValid() && "Expected valid register!")((Reg.isValid() && "Expected valid register!") ? static_cast
<void> (0) : __assert_fail ("Reg.isValid() && \"Expected valid register!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1268, __PRETTY_FUNCTION__))
;
1269 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1270 unsigned Opc = MI->getOpcode();
1271
1272 if (!MI->getOperand(0).isReg() ||
1273 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1274 break;
1275
1276 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1277 //
1278 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1279 // on the truncated x is the same as the bit number on x.
1280 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1281 Opc == TargetOpcode::G_TRUNC) {
1282 Register NextReg = MI->getOperand(1).getReg();
1283 // Did we find something worth folding?
1284 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1285 break;
1286
1287 // NextReg is worth folding. Keep looking.
1288 Reg = NextReg;
1289 continue;
1290 }
1291
1292 // Attempt to find a suitable operation with a constant on one side.
1293 Optional<uint64_t> C;
1294 Register TestReg;
1295 switch (Opc) {
1296 default:
1297 break;
1298 case TargetOpcode::G_AND:
1299 case TargetOpcode::G_XOR: {
1300 TestReg = MI->getOperand(1).getReg();
1301 Register ConstantReg = MI->getOperand(2).getReg();
1302 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1303 if (!VRegAndVal) {
1304 // AND commutes, check the other side for a constant.
1305 // FIXME: Can we canonicalize the constant so that it's always on the
1306 // same side at some point earlier?
1307 std::swap(ConstantReg, TestReg);
1308 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1309 }
1310 if (VRegAndVal)
1311 C = VRegAndVal->Value.getSExtValue();
1312 break;
1313 }
1314 case TargetOpcode::G_ASHR:
1315 case TargetOpcode::G_LSHR:
1316 case TargetOpcode::G_SHL: {
1317 TestReg = MI->getOperand(1).getReg();
1318 auto VRegAndVal =
1319 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1320 if (VRegAndVal)
1321 C = VRegAndVal->Value.getSExtValue();
1322 break;
1323 }
1324 }
1325
1326 // Didn't find a constant or viable register. Bail out of the loop.
1327 if (!C || !TestReg.isValid())
1328 break;
1329
1330 // We found a suitable instruction with a constant. Check to see if we can
1331 // walk through the instruction.
1332 Register NextReg;
1333 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1334 switch (Opc) {
1335 default:
1336 break;
1337 case TargetOpcode::G_AND:
1338 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1339 if ((*C >> Bit) & 1)
1340 NextReg = TestReg;
1341 break;
1342 case TargetOpcode::G_SHL:
1343 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1344 // the type of the register.
1345 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1346 NextReg = TestReg;
1347 Bit = Bit - *C;
1348 }
1349 break;
1350 case TargetOpcode::G_ASHR:
1351 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1352 // in x
1353 NextReg = TestReg;
1354 Bit = Bit + *C;
1355 if (Bit >= TestRegSize)
1356 Bit = TestRegSize - 1;
1357 break;
1358 case TargetOpcode::G_LSHR:
1359 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1360 if ((Bit + *C) < TestRegSize) {
1361 NextReg = TestReg;
1362 Bit = Bit + *C;
1363 }
1364 break;
1365 case TargetOpcode::G_XOR:
1366 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1367 // appropriate.
1368 //
1369 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1370 //
1371 // tbz x', b -> tbnz x, b
1372 //
1373 // Because x' only has the b-th bit set if x does not.
1374 if ((*C >> Bit) & 1)
1375 Invert = !Invert;
1376 NextReg = TestReg;
1377 break;
1378 }
1379
1380 // Check if we found anything worth folding.
1381 if (!NextReg.isValid())
1382 return Reg;
1383 Reg = NextReg;
1384 }
1385
1386 return Reg;
1387}
1388
1389MachineInstr *AArch64InstructionSelector::emitTestBit(
1390 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1391 MachineIRBuilder &MIB) const {
1392 assert(TestReg.isValid())((TestReg.isValid()) ? static_cast<void> (0) : __assert_fail
("TestReg.isValid()", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1392, __PRETTY_FUNCTION__))
;
1393 assert(ProduceNonFlagSettingCondBr &&((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __PRETTY_FUNCTION__))
1394 "Cannot emit TB(N)Z with speculation tracking!")((ProduceNonFlagSettingCondBr && "Cannot emit TB(N)Z with speculation tracking!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"Cannot emit TB(N)Z with speculation tracking!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1394, __PRETTY_FUNCTION__))
;
1395 MachineRegisterInfo &MRI = *MIB.getMRI();
1396
1397 // Attempt to optimize the test bit by walking over instructions.
1398 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1399 LLT Ty = MRI.getType(TestReg);
1400 unsigned Size = Ty.getSizeInBits();
1401 assert(!Ty.isVector() && "Expected a scalar!")((!Ty.isVector() && "Expected a scalar!") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1401, __PRETTY_FUNCTION__))
;
1402 assert(Bit < 64 && "Bit is too large!")((Bit < 64 && "Bit is too large!") ? static_cast<
void> (0) : __assert_fail ("Bit < 64 && \"Bit is too large!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1402, __PRETTY_FUNCTION__))
;
1403
1404 // When the test register is a 64-bit register, we have to narrow to make
1405 // TBNZW work.
1406 bool UseWReg = Bit < 32;
1407 unsigned NecessarySize = UseWReg ? 32 : 64;
1408 if (Size != NecessarySize)
1409 TestReg = moveScalarRegClass(
1410 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1411 MIB);
1412
1413 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1414 {AArch64::TBZW, AArch64::TBNZW}};
1415 unsigned Opc = OpcTable[UseWReg][IsNegative];
1416 auto TestBitMI =
1417 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1418 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1419 return &*TestBitMI;
1420}
1421
1422bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1423 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1424 MachineIRBuilder &MIB) const {
1425 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?")((AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"
) ? static_cast<void> (0) : __assert_fail ("AndInst.getOpcode() == TargetOpcode::G_AND && \"Expected G_AND only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1425, __PRETTY_FUNCTION__))
;
1426 // Given something like this:
1427 //
1428 // %x = ...Something...
1429 // %one = G_CONSTANT i64 1
1430 // %zero = G_CONSTANT i64 0
1431 // %and = G_AND %x, %one
1432 // %cmp = G_ICMP intpred(ne), %and, %zero
1433 // %cmp_trunc = G_TRUNC %cmp
1434 // G_BRCOND %cmp_trunc, %bb.3
1435 //
1436 // We want to try and fold the AND into the G_BRCOND and produce either a
1437 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1438 //
1439 // In this case, we'd get
1440 //
1441 // TBNZ %x %bb.3
1442 //
1443
1444 // Check if the AND has a constant on its RHS which we can use as a mask.
1445 // If it's a power of 2, then it's the same as checking a specific bit.
1446 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1447 auto MaybeBit = getConstantVRegValWithLookThrough(
1448 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1449 if (!MaybeBit)
1450 return false;
1451
1452 int32_t Bit = MaybeBit->Value.exactLogBase2();
1453 if (Bit < 0)
1454 return false;
1455
1456 Register TestReg = AndInst.getOperand(1).getReg();
1457
1458 // Emit a TB(N)Z.
1459 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1460 return true;
1461}
1462
1463MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1464 bool IsNegative,
1465 MachineBasicBlock *DestMBB,
1466 MachineIRBuilder &MIB) const {
1467 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!")((ProduceNonFlagSettingCondBr && "CBZ does not set flags!"
) ? static_cast<void> (0) : __assert_fail ("ProduceNonFlagSettingCondBr && \"CBZ does not set flags!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1467, __PRETTY_FUNCTION__))
;
1468 MachineRegisterInfo &MRI = *MIB.getMRI();
1469 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1471, __PRETTY_FUNCTION__))
1470 AArch64::GPRRegBankID &&((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1471, __PRETTY_FUNCTION__))
1471 "Expected GPRs only?")((RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64
::GPRRegBankID && "Expected GPRs only?") ? static_cast
<void> (0) : __assert_fail ("RBI.getRegBank(CompareReg, MRI, TRI)->getID() == AArch64::GPRRegBankID && \"Expected GPRs only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1471, __PRETTY_FUNCTION__))
;
1472 auto Ty = MRI.getType(CompareReg);
1473 unsigned Width = Ty.getSizeInBits();
1474 assert(!Ty.isVector() && "Expected scalar only?")((!Ty.isVector() && "Expected scalar only?") ? static_cast
<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected scalar only?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1474, __PRETTY_FUNCTION__))
;
1475 assert(Width <= 64 && "Expected width to be at most 64?")((Width <= 64 && "Expected width to be at most 64?"
) ? static_cast<void> (0) : __assert_fail ("Width <= 64 && \"Expected width to be at most 64?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1475, __PRETTY_FUNCTION__))
;
1476 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1477 {AArch64::CBNZW, AArch64::CBNZX}};
1478 unsigned Opc = OpcTable[IsNegative][Width == 64];
1479 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1480 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1481 return &*BranchMI;
1482}
1483
1484bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1485 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1486 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP)((FCmp.getOpcode() == TargetOpcode::G_FCMP) ? static_cast<
void> (0) : __assert_fail ("FCmp.getOpcode() == TargetOpcode::G_FCMP"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1486, __PRETTY_FUNCTION__))
;
1487 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1487, __PRETTY_FUNCTION__))
;
1488 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1489 // totally clean. Some of them require two branches to implement.
1490 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1491 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1492 Pred);
1493 AArch64CC::CondCode CC1, CC2;
1494 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1495 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1496 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1497 if (CC2 != AArch64CC::AL)
1498 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1499 I.eraseFromParent();
1500 return true;
1501}
1502
1503bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1504 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1505 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1505, __PRETTY_FUNCTION__))
;
1506 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1506, __PRETTY_FUNCTION__))
;
1507 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1508 //
1509 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1510 // instructions will not be produced, as they are conditional branch
1511 // instructions that do not set flags.
1512 if (!ProduceNonFlagSettingCondBr)
1513 return false;
1514
1515 MachineRegisterInfo &MRI = *MIB.getMRI();
1516 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1517 auto Pred =
1518 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1519 Register LHS = ICmp.getOperand(2).getReg();
1520 Register RHS = ICmp.getOperand(3).getReg();
1521
1522 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1523 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1524 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1525
1526 // When we can emit a TB(N)Z, prefer that.
1527 //
1528 // Handle non-commutative condition codes first.
1529 // Note that we don't want to do this when we have a G_AND because it can
1530 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1531 if (VRegAndVal && !AndInst) {
1532 int64_t C = VRegAndVal->Value.getSExtValue();
1533
1534 // When we have a greater-than comparison, we can just test if the msb is
1535 // zero.
1536 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1537 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1538 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1539 I.eraseFromParent();
1540 return true;
1541 }
1542
1543 // When we have a less than comparison, we can just test if the msb is not
1544 // zero.
1545 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1546 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1547 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1548 I.eraseFromParent();
1549 return true;
1550 }
1551 }
1552
1553 // Attempt to handle commutative condition codes. Right now, that's only
1554 // eq/ne.
1555 if (ICmpInst::isEquality(Pred)) {
1556 if (!VRegAndVal) {
1557 std::swap(RHS, LHS);
1558 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1559 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1560 }
1561
1562 if (VRegAndVal && VRegAndVal->Value == 0) {
1563 // If there's a G_AND feeding into this branch, try to fold it away by
1564 // emitting a TB(N)Z instead.
1565 //
1566 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1567 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1568 // would be redundant.
1569 if (AndInst &&
1570 tryOptAndIntoCompareBranch(
1571 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1572 I.eraseFromParent();
1573 return true;
1574 }
1575
1576 // Otherwise, try to emit a CB(N)Z instead.
1577 auto LHSTy = MRI.getType(LHS);
1578 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1579 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1580 I.eraseFromParent();
1581 return true;
1582 }
1583 }
1584 }
1585
1586 return false;
1587}
1588
1589bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1590 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1591 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP)((ICmp.getOpcode() == TargetOpcode::G_ICMP) ? static_cast<
void> (0) : __assert_fail ("ICmp.getOpcode() == TargetOpcode::G_ICMP"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1591, __PRETTY_FUNCTION__))
;
1592 assert(I.getOpcode() == TargetOpcode::G_BRCOND)((I.getOpcode() == TargetOpcode::G_BRCOND) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRCOND"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1592, __PRETTY_FUNCTION__))
;
1593 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1594 return true;
1595
1596 // Couldn't optimize. Emit a compare + a Bcc.
1597 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1598 auto PredOp = ICmp.getOperand(1);
1599 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1600 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1601 static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1602 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1603 I.eraseFromParent();
1604 return true;
1605}
1606
1607bool AArch64InstructionSelector::selectCompareBranch(
1608 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1609 Register CondReg = I.getOperand(0).getReg();
1610 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1611 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1612 CondReg = CCMI->getOperand(1).getReg();
1613 CCMI = MRI.getVRegDef(CondReg);
1614 }
1615
1616 // Try to select the G_BRCOND using whatever is feeding the condition if
1617 // possible.
1618 MachineIRBuilder MIB(I);
1619 unsigned CCMIOpc = CCMI->getOpcode();
1620 if (CCMIOpc == TargetOpcode::G_FCMP)
1621 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1622 if (CCMIOpc == TargetOpcode::G_ICMP)
1623 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1624
1625 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1626 // instructions will not be produced, as they are conditional branch
1627 // instructions that do not set flags.
1628 if (ProduceNonFlagSettingCondBr) {
1629 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1630 I.getOperand(1).getMBB(), MIB);
1631 I.eraseFromParent();
1632 return true;
1633 }
1634
1635 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1636 auto TstMI =
1637 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1638 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1639 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1640 .addImm(AArch64CC::EQ)
1641 .addMBB(I.getOperand(1).getMBB());
1642 I.eraseFromParent();
1643 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1644}
1645
1646/// Returns the element immediate value of a vector shift operand if found.
1647/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1648static Optional<int64_t> getVectorShiftImm(Register Reg,
1649 MachineRegisterInfo &MRI) {
1650 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand")((MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(Reg).isVector() && \"Expected a *vector* shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1650, __PRETTY_FUNCTION__))
;
1651 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1652 assert(OpMI && "Expected to find a vreg def for vector shift operand")((OpMI && "Expected to find a vreg def for vector shift operand"
) ? static_cast<void> (0) : __assert_fail ("OpMI && \"Expected to find a vreg def for vector shift operand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1652, __PRETTY_FUNCTION__))
;
1653 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1654 return None;
1655
1656 // Check all operands are identical immediates.
1657 int64_t ImmVal = 0;
1658 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1659 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1660 if (!VRegAndVal)
1661 return None;
1662
1663 if (Idx == 1)
1664 ImmVal = VRegAndVal->Value.getSExtValue();
1665 if (ImmVal != VRegAndVal->Value.getSExtValue())
1666 return None;
1667 }
1668
1669 return ImmVal;
1670}
1671
1672/// Matches and returns the shift immediate value for a SHL instruction given
1673/// a shift operand.
1674static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1675 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1676 if (!ShiftImm)
1677 return None;
1678 // Check the immediate is in range for a SHL.
1679 int64_t Imm = *ShiftImm;
1680 if (Imm < 0)
1681 return None;
1682 switch (SrcTy.getElementType().getSizeInBits()) {
1683 default:
1684 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled element type for vector shift"
; } } while (false)
;
1685 return None;
1686 case 8:
1687 if (Imm > 7)
1688 return None;
1689 break;
1690 case 16:
1691 if (Imm > 15)
1692 return None;
1693 break;
1694 case 32:
1695 if (Imm > 31)
1696 return None;
1697 break;
1698 case 64:
1699 if (Imm > 63)
1700 return None;
1701 break;
1702 }
1703 return Imm;
1704}
1705
1706bool AArch64InstructionSelector::selectVectorSHL(
1707 MachineInstr &I, MachineRegisterInfo &MRI) const {
1708 assert(I.getOpcode() == TargetOpcode::G_SHL)((I.getOpcode() == TargetOpcode::G_SHL) ? static_cast<void
> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1708, __PRETTY_FUNCTION__))
;
1709 Register DstReg = I.getOperand(0).getReg();
1710 const LLT Ty = MRI.getType(DstReg);
1711 Register Src1Reg = I.getOperand(1).getReg();
1712 Register Src2Reg = I.getOperand(2).getReg();
1713
1714 if (!Ty.isVector())
1715 return false;
1716
1717 // Check if we have a vector of constants on RHS that we can select as the
1718 // immediate form.
1719 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1720
1721 unsigned Opc = 0;
1722 if (Ty == LLT::vector(2, 64)) {
1723 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1724 } else if (Ty == LLT::vector(4, 32)) {
1725 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1726 } else if (Ty == LLT::vector(2, 32)) {
1727 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1728 } else if (Ty == LLT::vector(4, 16)) {
1729 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1730 } else if (Ty == LLT::vector(8, 16)) {
1731 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1732 } else if (Ty == LLT::vector(16, 8)) {
1733 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1734 } else if (Ty == LLT::vector(8, 8)) {
1735 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1736 } else {
1737 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_SHL type"; }
} while (false)
;
1738 return false;
1739 }
1740
1741 MachineIRBuilder MIB(I);
1742 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1743 if (ImmVal)
1744 Shl.addImm(*ImmVal);
1745 else
1746 Shl.addUse(Src2Reg);
1747 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1748 I.eraseFromParent();
1749 return true;
1750}
1751
1752bool AArch64InstructionSelector::selectVectorAshrLshr(
1753 MachineInstr &I, MachineRegisterInfo &MRI) const {
1754 assert(I.getOpcode() == TargetOpcode::G_ASHR ||((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1755, __PRETTY_FUNCTION__))
1755 I.getOpcode() == TargetOpcode::G_LSHR)((I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode
::G_LSHR) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_ASHR || I.getOpcode() == TargetOpcode::G_LSHR"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1755, __PRETTY_FUNCTION__))
;
1756 Register DstReg = I.getOperand(0).getReg();
1757 const LLT Ty = MRI.getType(DstReg);
1758 Register Src1Reg = I.getOperand(1).getReg();
1759 Register Src2Reg = I.getOperand(2).getReg();
1760
1761 if (!Ty.isVector())
1762 return false;
1763
1764 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1765
1766 // We expect the immediate case to be lowered in the PostLegalCombiner to
1767 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1768
1769 // There is not a shift right register instruction, but the shift left
1770 // register instruction takes a signed value, where negative numbers specify a
1771 // right shift.
1772
1773 unsigned Opc = 0;
1774 unsigned NegOpc = 0;
1775 const TargetRegisterClass *RC =
1776 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1777 if (Ty == LLT::vector(2, 64)) {
1778 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1779 NegOpc = AArch64::NEGv2i64;
1780 } else if (Ty == LLT::vector(4, 32)) {
1781 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1782 NegOpc = AArch64::NEGv4i32;
1783 } else if (Ty == LLT::vector(2, 32)) {
1784 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1785 NegOpc = AArch64::NEGv2i32;
1786 } else if (Ty == LLT::vector(4, 16)) {
1787 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1788 NegOpc = AArch64::NEGv4i16;
1789 } else if (Ty == LLT::vector(8, 16)) {
1790 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1791 NegOpc = AArch64::NEGv8i16;
1792 } else if (Ty == LLT::vector(16, 8)) {
1793 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1794 NegOpc = AArch64::NEGv8i16;
1795 } else if (Ty == LLT::vector(8, 8)) {
1796 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1797 NegOpc = AArch64::NEGv8i8;
1798 } else {
1799 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled G_ASHR type"; }
} while (false)
;
1800 return false;
1801 }
1802
1803 MachineIRBuilder MIB(I);
1804 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1805 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1806 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1807 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1808 I.eraseFromParent();
1809 return true;
1810}
1811
1812bool AArch64InstructionSelector::selectVaStartAAPCS(
1813 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1814 return false;
1815}
1816
1817bool AArch64InstructionSelector::selectVaStartDarwin(
1818 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1819 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1820 Register ListReg = I.getOperand(0).getReg();
1821
1822 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1823
1824 auto MIB =
1825 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1826 .addDef(ArgsAddrReg)
1827 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1828 .addImm(0)
1829 .addImm(0);
1830
1831 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1832
1833 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1834 .addUse(ArgsAddrReg)
1835 .addUse(ListReg)
1836 .addImm(0)
1837 .addMemOperand(*I.memoperands_begin());
1838
1839 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1840 I.eraseFromParent();
1841 return true;
1842}
1843
1844void AArch64InstructionSelector::materializeLargeCMVal(
1845 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1846 MachineBasicBlock &MBB = *I.getParent();
1847 MachineFunction &MF = *MBB.getParent();
1848 MachineRegisterInfo &MRI = MF.getRegInfo();
1849 MachineIRBuilder MIB(I);
1850
1851 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1852 MovZ->addOperand(MF, I.getOperand(1));
1853 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1854 AArch64II::MO_NC);
1855 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1856 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1857
1858 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1859 Register ForceDstReg) {
1860 Register DstReg = ForceDstReg
1861 ? ForceDstReg
1862 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1863 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1864 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1865 MovI->addOperand(MF, MachineOperand::CreateGA(
1866 GV, MovZ->getOperand(1).getOffset(), Flags));
1867 } else {
1868 MovI->addOperand(
1869 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1870 MovZ->getOperand(1).getOffset(), Flags));
1871 }
1872 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1873 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1874 return DstReg;
1875 };
1876 Register DstReg = BuildMovK(MovZ.getReg(0),
1877 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1878 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1879 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1880}
1881
1882bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1883 MachineBasicBlock &MBB = *I.getParent();
1884 MachineFunction &MF = *MBB.getParent();
1885 MachineRegisterInfo &MRI = MF.getRegInfo();
1886
1887 switch (I.getOpcode()) {
1888 case TargetOpcode::G_SHL:
1889 case TargetOpcode::G_ASHR:
1890 case TargetOpcode::G_LSHR: {
1891 // These shifts are legalized to have 64 bit shift amounts because we want
1892 // to take advantage of the existing imported selection patterns that assume
1893 // the immediates are s64s. However, if the shifted type is 32 bits and for
1894 // some reason we receive input GMIR that has an s64 shift amount that's not
1895 // a G_CONSTANT, insert a truncate so that we can still select the s32
1896 // register-register variant.
1897 Register SrcReg = I.getOperand(1).getReg();
1898 Register ShiftReg = I.getOperand(2).getReg();
1899 const LLT ShiftTy = MRI.getType(ShiftReg);
1900 const LLT SrcTy = MRI.getType(SrcReg);
1901 if (SrcTy.isVector())
1902 return false;
1903 assert(!ShiftTy.isVector() && "unexpected vector shift ty")((!ShiftTy.isVector() && "unexpected vector shift ty"
) ? static_cast<void> (0) : __assert_fail ("!ShiftTy.isVector() && \"unexpected vector shift ty\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1903, __PRETTY_FUNCTION__))
;
1904 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1905 return false;
1906 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1907 assert(AmtMI && "could not find a vreg definition for shift amount")((AmtMI && "could not find a vreg definition for shift amount"
) ? static_cast<void> (0) : __assert_fail ("AmtMI && \"could not find a vreg definition for shift amount\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1907, __PRETTY_FUNCTION__))
;
1908 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1909 // Insert a subregister copy to implement a 64->32 trunc
1910 MachineIRBuilder MIB(I);
1911 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1912 .addReg(ShiftReg, 0, AArch64::sub_32);
1913 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1914 I.getOperand(2).setReg(Trunc.getReg(0));
1915 }
1916 return true;
1917 }
1918 case TargetOpcode::G_STORE:
1919 return contractCrossBankCopyIntoStore(I, MRI);
1920 case TargetOpcode::G_PTR_ADD:
1921 return convertPtrAddToAdd(I, MRI);
1922 case TargetOpcode::G_LOAD: {
1923 // For scalar loads of pointers, we try to convert the dest type from p0
1924 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1925 // conversion, this should be ok because all users should have been
1926 // selected already, so the type doesn't matter for them.
1927 Register DstReg = I.getOperand(0).getReg();
1928 const LLT DstTy = MRI.getType(DstReg);
1929 if (!DstTy.isPointer())
1930 return false;
1931 MRI.setType(DstReg, LLT::scalar(64));
1932 return true;
1933 }
1934 case AArch64::G_DUP: {
1935 // Convert the type from p0 to s64 to help selection.
1936 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1937 if (!DstTy.getElementType().isPointer())
1938 return false;
1939 MachineIRBuilder MIB(I);
1940 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1941 MRI.setType(I.getOperand(0).getReg(),
1942 DstTy.changeElementType(LLT::scalar(64)));
1943 MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1944 I.getOperand(1).setReg(NewSrc.getReg(0));
1945 return true;
1946 }
1947 case TargetOpcode::G_UITOFP:
1948 case TargetOpcode::G_SITOFP: {
1949 // If both source and destination regbanks are FPR, then convert the opcode
1950 // to G_SITOF so that the importer can select it to an fpr variant.
1951 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1952 // copy.
1953 Register SrcReg = I.getOperand(1).getReg();
1954 LLT SrcTy = MRI.getType(SrcReg);
1955 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1956 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
1957 return false;
1958
1959 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
1960 if (I.getOpcode() == TargetOpcode::G_SITOFP)
1961 I.setDesc(TII.get(AArch64::G_SITOF));
1962 else
1963 I.setDesc(TII.get(AArch64::G_UITOF));
1964 return true;
1965 }
1966 return false;
1967 }
1968 default:
1969 return false;
1970 }
1971}
1972
1973/// This lowering tries to look for G_PTR_ADD instructions and then converts
1974/// them to a standard G_ADD with a COPY on the source.
1975///
1976/// The motivation behind this is to expose the add semantics to the imported
1977/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1978/// because the selector works bottom up, uses before defs. By the time we
1979/// end up trying to select a G_PTR_ADD, we should have already attempted to
1980/// fold this into addressing modes and were therefore unsuccessful.
1981bool AArch64InstructionSelector::convertPtrAddToAdd(
1982 MachineInstr &I, MachineRegisterInfo &MRI) {
1983 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_PTR_ADD && \"Expected G_PTR_ADD\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 1983, __PRETTY_FUNCTION__))
;
1984 Register DstReg = I.getOperand(0).getReg();
1985 Register AddOp1Reg = I.getOperand(1).getReg();
1986 const LLT PtrTy = MRI.getType(DstReg);
1987 if (PtrTy.getAddressSpace() != 0)
1988 return false;
1989
1990 MachineIRBuilder MIB(I);
1991 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1992 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1993 // Set regbanks on the registers.
1994 if (PtrTy.isVector())
1995 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1996 else
1997 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1998
1999 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2000 // %dst(intty) = G_ADD %intbase, off
2001 I.setDesc(TII.get(TargetOpcode::G_ADD));
2002 MRI.setType(DstReg, CastPtrTy);
2003 I.getOperand(1).setReg(PtrToInt.getReg(0));
2004 if (!select(*PtrToInt)) {
2005 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"
; } } while (false)
;
2006 return false;
2007 }
2008
2009 // Also take the opportunity here to try to do some optimization.
2010 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2011 Register NegatedReg;
2012 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2013 return true;
2014 I.getOperand(2).setReg(NegatedReg);
2015 I.setDesc(TII.get(TargetOpcode::G_SUB));
2016 return true;
2017}
2018
2019bool AArch64InstructionSelector::earlySelectSHL(
2020 MachineInstr &I, MachineRegisterInfo &MRI) const {
2021 // We try to match the immediate variant of LSL, which is actually an alias
2022 // for a special case of UBFM. Otherwise, we fall back to the imported
2023 // selector which will match the register variant.
2024 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op")((I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_SHL && \"unexpected op\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2024, __PRETTY_FUNCTION__))
;
2025 const auto &MO = I.getOperand(2);
2026 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
2027 if (!VRegAndVal)
2028 return false;
2029
2030 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2031 if (DstTy.isVector())
2032 return false;
2033 bool Is64Bit = DstTy.getSizeInBits() == 64;
2034 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2035 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2036 MachineIRBuilder MIB(I);
2037
2038 if (!Imm1Fn || !Imm2Fn)
2039 return false;
2040
2041 auto NewI =
2042 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2043 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2044
2045 for (auto &RenderFn : *Imm1Fn)
2046 RenderFn(NewI);
2047 for (auto &RenderFn : *Imm2Fn)
2048 RenderFn(NewI);
2049
2050 I.eraseFromParent();
2051 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2052}
2053
2054bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2055 MachineInstr &I, MachineRegisterInfo &MRI) {
2056 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE")((I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_STORE && \"Expected G_STORE\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2056, __PRETTY_FUNCTION__))
;
2057 // If we're storing a scalar, it doesn't matter what register bank that
2058 // scalar is on. All that matters is the size.
2059 //
2060 // So, if we see something like this (with a 32-bit scalar as an example):
2061 //
2062 // %x:gpr(s32) = ... something ...
2063 // %y:fpr(s32) = COPY %x:gpr(s32)
2064 // G_STORE %y:fpr(s32)
2065 //
2066 // We can fix this up into something like this:
2067 //
2068 // G_STORE %x:gpr(s32)
2069 //
2070 // And then continue the selection process normally.
2071 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2072 if (!DefDstReg.isValid())
2073 return false;
2074 LLT DefDstTy = MRI.getType(DefDstReg);
2075 Register StoreSrcReg = I.getOperand(0).getReg();
2076 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2077
2078 // If we get something strange like a physical register, then we shouldn't
2079 // go any further.
2080 if (!DefDstTy.isValid())
2081 return false;
2082
2083 // Are the source and dst types the same size?
2084 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2085 return false;
2086
2087 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2088 RBI.getRegBank(DefDstReg, MRI, TRI))
2089 return false;
2090
2091 // We have a cross-bank copy, which is entering a store. Let's fold it.
2092 I.getOperand(0).setReg(DefDstReg);
2093 return true;
2094}
2095
2096bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
2097 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2097, __PRETTY_FUNCTION__))
;
2098 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2098, __PRETTY_FUNCTION__))
;
2099
2100 MachineBasicBlock &MBB = *I.getParent();
2101 MachineFunction &MF = *MBB.getParent();
2102 MachineRegisterInfo &MRI = MF.getRegInfo();
2103
2104 switch (I.getOpcode()) {
2105 case TargetOpcode::G_BR: {
2106 // If the branch jumps to the fallthrough block, don't bother emitting it.
2107 // Only do this for -O0 for a good code size improvement, because when
2108 // optimizations are enabled we want to leave this choice to
2109 // MachineBlockPlacement.
2110 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
2111 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
2112 return false;
2113 I.eraseFromParent();
2114 return true;
2115 }
2116 case TargetOpcode::G_SHL:
2117 return earlySelectSHL(I, MRI);
2118 case TargetOpcode::G_CONSTANT: {
2119 bool IsZero = false;
2120 if (I.getOperand(1).isCImm())
2121 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2122 else if (I.getOperand(1).isImm())
2123 IsZero = I.getOperand(1).getImm() == 0;
2124
2125 if (!IsZero)
2126 return false;
2127
2128 Register DefReg = I.getOperand(0).getReg();
2129 LLT Ty = MRI.getType(DefReg);
2130 if (Ty.getSizeInBits() == 64) {
2131 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2132 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2133 } else if (Ty.getSizeInBits() == 32) {
2134 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2135 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2136 } else
2137 return false;
2138
2139 I.setDesc(TII.get(TargetOpcode::COPY));
2140 return true;
2141 }
2142 default:
2143 return false;
2144 }
2145}
2146
2147bool AArch64InstructionSelector::select(MachineInstr &I) {
2148 assert(I.getParent() && "Instruction should be in a basic block!")((I.getParent() && "Instruction should be in a basic block!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent() && \"Instruction should be in a basic block!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2148, __PRETTY_FUNCTION__))
;
2149 assert(I.getParent()->getParent() && "Instruction should be in a function!")((I.getParent()->getParent() && "Instruction should be in a function!"
) ? static_cast<void> (0) : __assert_fail ("I.getParent()->getParent() && \"Instruction should be in a function!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2149, __PRETTY_FUNCTION__))
;
2150
2151 MachineBasicBlock &MBB = *I.getParent();
2152 MachineFunction &MF = *MBB.getParent();
2153 MachineRegisterInfo &MRI = MF.getRegInfo();
2154
2155 const AArch64Subtarget *Subtarget =
2156 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2157 if (Subtarget->requiresStrictAlign()) {
2158 // We don't support this feature yet.
2159 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "AArch64 GISel does not support strict-align yet\n"
; } } while (false)
;
2160 return false;
2161 }
2162
2163 unsigned Opcode = I.getOpcode();
2164 // G_PHI requires same handling as PHI
2165 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2166 // Certain non-generic instructions also need some special handling.
2167
2168 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2169 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2170
2171 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2172 const Register DefReg = I.getOperand(0).getReg();
2173 const LLT DefTy = MRI.getType(DefReg);
2174
2175 const RegClassOrRegBank &RegClassOrBank =
2176 MRI.getRegClassOrRegBank(DefReg);
2177
2178 const TargetRegisterClass *DefRC
2179 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2180 if (!DefRC) {
2181 if (!DefTy.isValid()) {
2182 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has no type, not a gvreg?\n"
; } } while (false)
;
2183 return false;
2184 }
2185 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2186 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2187 if (!DefRC) {
2188 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "PHI operand has unexpected size/bank\n"
; } } while (false)
;
2189 return false;
2190 }
2191 }
2192
2193 I.setDesc(TII.get(TargetOpcode::PHI));
2194
2195 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2196 }
2197
2198 if (I.isCopy())
2199 return selectCopy(I, TII, MRI, TRI, RBI);
2200
2201 return true;
2202 }
2203
2204
2205 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2206 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
2207 dbgs() << "Generic instruction has unexpected implicit operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Generic instruction has unexpected implicit operands\n"
; } } while (false)
;
2208 return false;
2209 }
2210
2211 // Try to do some lowering before we start instruction selecting. These
2212 // lowerings are purely transformations on the input G_MIR and so selection
2213 // must continue after any modification of the instruction.
2214 if (preISelLower(I)) {
2215 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2216 }
2217
2218 // There may be patterns where the importer can't deal with them optimally,
2219 // but does select it to a suboptimal sequence so our custom C++ selection
2220 // code later never has a chance to work on it. Therefore, we have an early
2221 // selection attempt here to give priority to certain selection routines
2222 // over the imported ones.
2223 if (earlySelect(I))
2224 return true;
2225
2226 if (selectImpl(I, *CoverageInfo))
2227 return true;
2228
2229 LLT Ty =
2230 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2231
2232 MachineIRBuilder MIB(I);
2233
2234 switch (Opcode) {
2235 case TargetOpcode::G_BRCOND:
2236 return selectCompareBranch(I, MF, MRI);
2237
2238 case TargetOpcode::G_BRINDIRECT: {
2239 I.setDesc(TII.get(AArch64::BR));
2240 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2241 }
2242
2243 case TargetOpcode::G_BRJT:
2244 return selectBrJT(I, MRI);
2245
2246 case AArch64::G_ADD_LOW: {
2247 // This op may have been separated from it's ADRP companion by the localizer
2248 // or some other code motion pass. Given that many CPUs will try to
2249 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2250 // which will later be expanded into an ADRP+ADD pair after scheduling.
2251 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2252 if (BaseMI->getOpcode() != AArch64::ADRP) {
2253 I.setDesc(TII.get(AArch64::ADDXri));
2254 I.addOperand(MachineOperand::CreateImm(0));
2255 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2256 }
2257 assert(TM.getCodeModel() == CodeModel::Small &&((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2258, __PRETTY_FUNCTION__))
2258 "Expected small code model")((TM.getCodeModel() == CodeModel::Small && "Expected small code model"
) ? static_cast<void> (0) : __assert_fail ("TM.getCodeModel() == CodeModel::Small && \"Expected small code model\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2258, __PRETTY_FUNCTION__))
;
2259 MachineIRBuilder MIB(I);
2260 auto Op1 = BaseMI->getOperand(1);
2261 auto Op2 = I.getOperand(2);
2262 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2263 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2264 Op1.getTargetFlags())
2265 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2266 Op2.getTargetFlags());
2267 I.eraseFromParent();
2268 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2269 }
2270
2271 case TargetOpcode::G_BSWAP: {
2272 // Handle vector types for G_BSWAP directly.
2273 Register DstReg = I.getOperand(0).getReg();
2274 LLT DstTy = MRI.getType(DstReg);
2275
2276 // We should only get vector types here; everything else is handled by the
2277 // importer right now.
2278 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2279 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Dst type for G_BSWAP currently unsupported.\n"
; } } while (false)
;
2280 return false;
2281 }
2282
2283 // Only handle 4 and 2 element vectors for now.
2284 // TODO: 16-bit elements.
2285 unsigned NumElts = DstTy.getNumElements();
2286 if (NumElts != 4 && NumElts != 2) {
2287 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported number of elements for G_BSWAP.\n"
; } } while (false)
;
2288 return false;
2289 }
2290
2291 // Choose the correct opcode for the supported types. Right now, that's
2292 // v2s32, v4s32, and v2s64.
2293 unsigned Opc = 0;
2294 unsigned EltSize = DstTy.getElementType().getSizeInBits();
2295 if (EltSize == 32)
2296 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2297 : AArch64::REV32v16i8;
2298 else if (EltSize == 64)
2299 Opc = AArch64::REV64v16i8;
2300
2301 // We should always get something by the time we get here...
2302 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?")((Opc != 0 && "Didn't get an opcode for G_BSWAP?") ? static_cast
<void> (0) : __assert_fail ("Opc != 0 && \"Didn't get an opcode for G_BSWAP?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2302, __PRETTY_FUNCTION__))
;
2303
2304 I.setDesc(TII.get(Opc));
2305 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2306 }
2307
2308 case TargetOpcode::G_FCONSTANT:
2309 case TargetOpcode::G_CONSTANT: {
2310 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2311
2312 const LLT s8 = LLT::scalar(8);
2313 const LLT s16 = LLT::scalar(16);
2314 const LLT s32 = LLT::scalar(32);
2315 const LLT s64 = LLT::scalar(64);
2316 const LLT s128 = LLT::scalar(128);
2317 const LLT p0 = LLT::pointer(0, 64);
2318
2319 const Register DefReg = I.getOperand(0).getReg();
2320 const LLT DefTy = MRI.getType(DefReg);
2321 const unsigned DefSize = DefTy.getSizeInBits();
2322 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2323
2324 // FIXME: Redundant check, but even less readable when factored out.
2325 if (isFP) {
2326 if (Ty != s32 && Ty != s64 && Ty != s128) {
2327 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2328 << " constant, expected: " << s32 << " or " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
2329 << " or " << s128 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant, expected: " << s32 <<
" or " << s64 << " or " << s128 << '\n'
; } } while (false)
;
2330 return false;
2331 }
2332
2333 if (RB.getID() != AArch64::FPRRegBankID) {
2334 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2335 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
2336 << ", expected: FPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize FP "
<< Ty << " constant on bank: " << RB <<
", expected: FPR\n"; } } while (false)
;
2337 return false;
2338 }
2339
2340 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2341 // can be sure tablegen works correctly and isn't rescued by this code.
2342 // 0.0 is not covered by tablegen for FP128. So we will handle this
2343 // scenario in the code here.
2344 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2345 return false;
2346 } else {
2347 // s32 and s64 are covered by tablegen.
2348 if (Ty != p0 && Ty != s8 && Ty != s16) {
2349 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2350 << " constant, expected: " << s32 << ", " << s64do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
2351 << ", or " << p0 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant, expected: " << s32 <<
", " << s64 << ", or " << p0 << '\n'
; } } while (false)
;
2352 return false;
2353 }
2354
2355 if (RB.getID() != AArch64::GPRRegBankID) {
2356 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2357 << " constant on bank: " << RBdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
2358 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unable to materialize integer "
<< Ty << " constant on bank: " << RB <<
", expected: GPR\n"; } } while (false)
;
2359 return false;
2360 }
2361 }
2362
2363 // We allow G_CONSTANT of types < 32b.
2364 const unsigned MovOpc =
2365 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2366
2367 if (isFP) {
2368 // Either emit a FMOV, or emit a copy to emit a normal mov.
2369 const TargetRegisterClass &GPRRC =
2370 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2371 const TargetRegisterClass &FPRRC =
2372 DefSize == 32 ? AArch64::FPR32RegClass
2373 : (DefSize == 64 ? AArch64::FPR64RegClass
2374 : AArch64::FPR128RegClass);
2375
2376 // Can we use a FMOV instruction to represent the immediate?
2377 if (emitFMovForFConstant(I, MRI))
2378 return true;
2379
2380 // For 64b values, emit a constant pool load instead.
2381 if (DefSize == 64 || DefSize == 128) {
2382 auto *FPImm = I.getOperand(1).getFPImm();
2383 MachineIRBuilder MIB(I);
2384 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2385 if (!LoadMI) {
2386 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to load double constant pool entry\n"
; } } while (false)
;
2387 return false;
2388 }
2389 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2390 I.eraseFromParent();
2391 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2392 }
2393
2394 // Nope. Emit a copy and use a normal mov instead.
2395 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2396 MachineOperand &RegOp = I.getOperand(0);
2397 RegOp.setReg(DefGPRReg);
2398 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2399 MIB.buildCopy({DefReg}, {DefGPRReg});
2400
2401 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2402 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_FCONSTANT def operand\n"
; } } while (false)
;
2403 return false;
2404 }
2405
2406 MachineOperand &ImmOp = I.getOperand(1);
2407 // FIXME: Is going through int64_t always correct?
2408 ImmOp.ChangeToImmediate(
2409 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2410 } else if (I.getOperand(1).isCImm()) {
2411 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2412 I.getOperand(1).ChangeToImmediate(Val);
2413 } else if (I.getOperand(1).isImm()) {
2414 uint64_t Val = I.getOperand(1).getImm();
2415 I.getOperand(1).ChangeToImmediate(Val);
2416 }
2417
2418 I.setDesc(TII.get(MovOpc));
2419 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2420 return true;
2421 }
2422 case TargetOpcode::G_EXTRACT: {
2423 Register DstReg = I.getOperand(0).getReg();
2424 Register SrcReg = I.getOperand(1).getReg();
2425 LLT SrcTy = MRI.getType(SrcReg);
2426 LLT DstTy = MRI.getType(DstReg);
2427 (void)DstTy;
2428 unsigned SrcSize = SrcTy.getSizeInBits();
2429
2430 if (SrcTy.getSizeInBits() > 64) {
2431 // This should be an extract of an s128, which is like a vector extract.
2432 if (SrcTy.getSizeInBits() != 128)
2433 return false;
2434 // Only support extracting 64 bits from an s128 at the moment.
2435 if (DstTy.getSizeInBits() != 64)
2436 return false;
2437
2438 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2439 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2440 // Check we have the right regbank always.
2441 assert(SrcRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2443, __PRETTY_FUNCTION__))
2442 DstRB.getID() == AArch64::FPRRegBankID &&((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2443, __PRETTY_FUNCTION__))
2443 "Wrong extract regbank!")((SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID
() == AArch64::FPRRegBankID && "Wrong extract regbank!"
) ? static_cast<void> (0) : __assert_fail ("SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && \"Wrong extract regbank!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2443, __PRETTY_FUNCTION__))
;
2444 (void)SrcRB;
2445
2446 // Emit the same code as a vector extract.
2447 // Offset must be a multiple of 64.
2448 unsigned Offset = I.getOperand(2).getImm();
2449 if (Offset % 64 != 0)
2450 return false;
2451 unsigned LaneIdx = Offset / 64;
2452 MachineIRBuilder MIB(I);
2453 MachineInstr *Extract = emitExtractVectorElt(
2454 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2455 if (!Extract)
2456 return false;
2457 I.eraseFromParent();
2458 return true;
2459 }
2460
2461 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2462 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2463 Ty.getSizeInBits() - 1);
2464
2465 if (SrcSize < 64) {
2466 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2467, __PRETTY_FUNCTION__))
2467 "unexpected G_EXTRACT types")((SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
"unexpected G_EXTRACT types") ? static_cast<void> (0) :
__assert_fail ("SrcSize == 32 && DstTy.getSizeInBits() == 16 && \"unexpected G_EXTRACT types\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2467, __PRETTY_FUNCTION__))
;
2468 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2469 }
2470
2471 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2472 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2473 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2474 .addReg(DstReg, 0, AArch64::sub_32);
2475 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2476 AArch64::GPR32RegClass, MRI);
2477 I.getOperand(0).setReg(DstReg);
2478
2479 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2480 }
2481
2482 case TargetOpcode::G_INSERT: {
2483 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2484 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2485 unsigned DstSize = DstTy.getSizeInBits();
2486 // Larger inserts are vectors, same-size ones should be something else by
2487 // now (split up or turned into COPYs).
2488 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2489 return false;
2490
2491 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2492 unsigned LSB = I.getOperand(3).getImm();
2493 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2494 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2495 MachineInstrBuilder(MF, I).addImm(Width - 1);
2496
2497 if (DstSize < 64) {
2498 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2499, __PRETTY_FUNCTION__))
2499 "unexpected G_INSERT types")((DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
"unexpected G_INSERT types") ? static_cast<void> (0) :
__assert_fail ("DstSize == 32 && SrcTy.getSizeInBits() == 16 && \"unexpected G_INSERT types\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2499, __PRETTY_FUNCTION__))
;
2500 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2501 }
2502
2503 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2504 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2505 TII.get(AArch64::SUBREG_TO_REG))
2506 .addDef(SrcReg)
2507 .addImm(0)
2508 .addUse(I.getOperand(2).getReg())
2509 .addImm(AArch64::sub_32);
2510 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2511 AArch64::GPR32RegClass, MRI);
2512 I.getOperand(2).setReg(SrcReg);
2513
2514 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2515 }
2516 case TargetOpcode::G_FRAME_INDEX: {
2517 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2518 if (Ty != LLT::pointer(0, 64)) {
2519 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
2520 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_FRAME_INDEX pointer has type: "
<< Ty << ", expected: " << LLT::pointer(0,
64) << '\n'; } } while (false)
;
2521 return false;
2522 }
2523 I.setDesc(TII.get(AArch64::ADDXri));
2524
2525 // MOs for a #0 shifted immediate.
2526 I.addOperand(MachineOperand::CreateImm(0));
2527 I.addOperand(MachineOperand::CreateImm(0));
2528
2529 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2530 }
2531
2532 case TargetOpcode::G_GLOBAL_VALUE: {
2533 auto GV = I.getOperand(1).getGlobal();
2534 if (GV->isThreadLocal())
2535 return selectTLSGlobalValue(I, MRI);
2536
2537 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2538 if (OpFlags & AArch64II::MO_GOT) {
2539 I.setDesc(TII.get(AArch64::LOADgot));
2540 I.getOperand(1).setTargetFlags(OpFlags);
2541 } else if (TM.getCodeModel() == CodeModel::Large) {
2542 // Materialize the global using movz/movk instructions.
2543 materializeLargeCMVal(I, GV, OpFlags);
2544 I.eraseFromParent();
2545 return true;
2546 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2547 I.setDesc(TII.get(AArch64::ADR));
2548 I.getOperand(1).setTargetFlags(OpFlags);
2549 } else {
2550 I.setDesc(TII.get(AArch64::MOVaddr));
2551 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2552 MachineInstrBuilder MIB(MF, I);
2553 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2554 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2555 }
2556 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2557 }
2558
2559 case TargetOpcode::G_ZEXTLOAD:
2560 case TargetOpcode::G_LOAD:
2561 case TargetOpcode::G_STORE: {
2562 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2563 MachineIRBuilder MIB(I);
2564
2565 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2566
2567 if (PtrTy != LLT::pointer(0, 64)) {
2568 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
2569 << ", expected: " << LLT::pointer(0, 64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Load/Store pointer has type: "
<< PtrTy << ", expected: " << LLT::pointer
(0, 64) << '\n'; } } while (false)
;
2570 return false;
2571 }
2572
2573 auto &MemOp = **I.memoperands_begin();
2574 uint64_t MemSizeInBytes = MemOp.getSize();
2575 if (MemOp.isAtomic()) {
2576 // For now we just support s8 acquire loads to be able to compile stack
2577 // protector code.
2578 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2579 MemSizeInBytes == 1) {
2580 I.setDesc(TII.get(AArch64::LDARB));
2581 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2582 }
2583 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Atomic load/store not fully supported yet\n"
; } } while (false)
;
2584 return false;
2585 }
2586 unsigned MemSizeInBits = MemSizeInBytes * 8;
2587
2588#ifndef NDEBUG
2589 const Register PtrReg = I.getOperand(1).getReg();
2590 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2591 // Sanity-check the pointer register.
2592 assert(PtrRB.getID() == AArch64::GPRRegBankID &&((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2593, __PRETTY_FUNCTION__))
2593 "Load/Store pointer operand isn't a GPR")((PtrRB.getID() == AArch64::GPRRegBankID && "Load/Store pointer operand isn't a GPR"
) ? static_cast<void> (0) : __assert_fail ("PtrRB.getID() == AArch64::GPRRegBankID && \"Load/Store pointer operand isn't a GPR\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2593, __PRETTY_FUNCTION__))
;
2594 assert(MRI.getType(PtrReg).isPointer() &&((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2595, __PRETTY_FUNCTION__))
2595 "Load/Store pointer operand isn't a pointer")((MRI.getType(PtrReg).isPointer() && "Load/Store pointer operand isn't a pointer"
) ? static_cast<void> (0) : __assert_fail ("MRI.getType(PtrReg).isPointer() && \"Load/Store pointer operand isn't a pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2595, __PRETTY_FUNCTION__))
;
2596#endif
2597
2598 const Register ValReg = I.getOperand(0).getReg();
2599 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2600
2601 // Helper lambda for partially selecting I. Either returns the original
2602 // instruction with an updated opcode, or a new instruction.
2603 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2604 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
1
Assuming the condition is true
2605 const unsigned NewOpc =
2606 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2607 if (NewOpc == I.getOpcode())
2
Taking false branch
2608 return nullptr;
2609 // Check if we can fold anything into the addressing mode.
2610 auto AddrModeFns =
2611 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3
Calling 'AArch64InstructionSelector::selectAddrModeIndexed'
2612 if (!AddrModeFns) {
2613 // Can't fold anything. Use the original instruction.
2614 I.setDesc(TII.get(NewOpc));
2615 I.addOperand(MachineOperand::CreateImm(0));
2616 return &I;
2617 }
2618
2619 // Folded something. Create a new instruction and return it.
2620 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2621 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
2622 NewInst.cloneMemRefs(I);
2623 for (auto &Fn : *AddrModeFns)
2624 Fn(NewInst);
2625 I.eraseFromParent();
2626 return &*NewInst;
2627 };
2628
2629 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2630 if (!LoadStore)
2631 return false;
2632
2633 // If we're storing a 0, use WZR/XZR.
2634 if (Opcode == TargetOpcode::G_STORE) {
2635 auto CVal = getConstantVRegValWithLookThrough(
2636 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
2637 /*HandleFConstants = */ false);
2638 if (CVal && CVal->Value == 0) {
2639 switch (LoadStore->getOpcode()) {
2640 case AArch64::STRWui:
2641 case AArch64::STRHHui:
2642 case AArch64::STRBBui:
2643 LoadStore->getOperand(0).setReg(AArch64::WZR);
2644 break;
2645 case AArch64::STRXui:
2646 LoadStore->getOperand(0).setReg(AArch64::XZR);
2647 break;
2648 }
2649 }
2650 }
2651
2652 if (IsZExtLoad) {
2653 // The zextload from a smaller type to i32 should be handled by the
2654 // importer.
2655 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2656 return false;
2657 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2658 // and zero_extend with SUBREG_TO_REG.
2659 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2660 Register DstReg = LoadStore->getOperand(0).getReg();
2661 LoadStore->getOperand(0).setReg(LdReg);
2662
2663 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2664 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2665 .addImm(0)
2666 .addUse(LdReg)
2667 .addImm(AArch64::sub_32);
2668 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2669 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2670 MRI);
2671 }
2672 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2673 }
2674
2675 case TargetOpcode::G_SMULH:
2676 case TargetOpcode::G_UMULH: {
2677 // Reject the various things we don't support yet.
2678 if (unsupportedBinOp(I, RBI, MRI, TRI))
2679 return false;
2680
2681 const Register DefReg = I.getOperand(0).getReg();
2682 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2683
2684 if (RB.getID() != AArch64::GPRRegBankID) {
2685 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH on bank: " <<
RB << ", expected: GPR\n"; } } while (false)
;
2686 return false;
2687 }
2688
2689 if (Ty != LLT::scalar(64)) {
2690 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
2691 << ", expected: " << LLT::scalar(64) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_[SU]MULH has type: " <<
Ty << ", expected: " << LLT::scalar(64) <<
'\n'; } } while (false)
;
2692 return false;
2693 }
2694
2695 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2696 : AArch64::UMULHrr;
2697 I.setDesc(TII.get(NewOpc));
2698
2699 // Now that we selected an opcode, we need to constrain the register
2700 // operands to use appropriate classes.
2701 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2702 }
2703 case TargetOpcode::G_LSHR:
2704 case TargetOpcode::G_ASHR:
2705 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2706 return selectVectorAshrLshr(I, MRI);
2707 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2708 case TargetOpcode::G_SHL:
2709 if (Opcode == TargetOpcode::G_SHL &&
2710 MRI.getType(I.getOperand(0).getReg()).isVector())
2711 return selectVectorSHL(I, MRI);
2712 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2713 case TargetOpcode::G_FADD:
2714 case TargetOpcode::G_FSUB:
2715 case TargetOpcode::G_FMUL:
2716 case TargetOpcode::G_FDIV:
2717 case TargetOpcode::G_OR: {
2718 // Reject the various things we don't support yet.
2719 if (unsupportedBinOp(I, RBI, MRI, TRI))
2720 return false;
2721
2722 const unsigned OpSize = Ty.getSizeInBits();
2723
2724 const Register DefReg = I.getOperand(0).getReg();
2725 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2726
2727 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2728 if (NewOpc == I.getOpcode())
2729 return false;
2730
2731 I.setDesc(TII.get(NewOpc));
2732 // FIXME: Should the type be always reset in setDesc?
2733
2734 // Now that we selected an opcode, we need to constrain the register
2735 // operands to use appropriate classes.
2736 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2737 }
2738
2739 case TargetOpcode::G_PTR_ADD: {
2740 MachineIRBuilder MIRBuilder(I);
2741 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2742 MIRBuilder);
2743 I.eraseFromParent();
2744 return true;
2745 }
2746 case TargetOpcode::G_SADDO:
2747 case TargetOpcode::G_UADDO:
2748 case TargetOpcode::G_SSUBO:
2749 case TargetOpcode::G_USUBO: {
2750 // Emit the operation and get the correct condition code.
2751 MachineIRBuilder MIRBuilder(I);
2752 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2753 I.getOperand(2), I.getOperand(3), MIRBuilder);
2754
2755 // Now, put the overflow result in the register given by the first operand
2756 // to the overflow op. CSINC increments the result when the predicate is
2757 // false, so to get the increment when it's true, we need to use the
2758 // inverse. In this case, we want to increment when carry is set.
2759 Register ZReg = AArch64::WZR;
2760 auto CsetMI = MIRBuilder
2761 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2762 {ZReg, ZReg})
2763 .addImm(getInvertedCondCode(OpAndCC.second));
2764 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2765 I.eraseFromParent();
2766 return true;
2767 }
2768
2769 case TargetOpcode::G_PTRMASK: {
2770 Register MaskReg = I.getOperand(2).getReg();
2771 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
2772 // TODO: Implement arbitrary cases
2773 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2774 return false;
2775
2776 uint64_t Mask = *MaskVal;
2777 I.setDesc(TII.get(AArch64::ANDXri));
2778 I.getOperand(2).ChangeToImmediate(
2779 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2780
2781 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2782 }
2783 case TargetOpcode::G_PTRTOINT:
2784 case TargetOpcode::G_TRUNC: {
2785 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2786 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2787
2788 const Register DstReg = I.getOperand(0).getReg();
2789 const Register SrcReg = I.getOperand(1).getReg();
2790
2791 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2792 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2793
2794 if (DstRB.getID() != SrcRB.getID()) {
2795 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
2796 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"
; } } while (false)
;
2797 return false;
2798 }
2799
2800 if (DstRB.getID() == AArch64::GPRRegBankID) {
2801 const TargetRegisterClass *DstRC =
2802 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2803 if (!DstRC)
2804 return false;
2805
2806 const TargetRegisterClass *SrcRC =
2807 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2808 if (!SrcRC)
2809 return false;
2810
2811 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2812 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2813 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2814 return false;
2815 }
2816
2817 if (DstRC == SrcRC) {
2818 // Nothing to be done
2819 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2820 SrcTy == LLT::scalar(64)) {
2821 llvm_unreachable("TableGen can import this case")::llvm::llvm_unreachable_internal("TableGen can import this case"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2821)
;
2822 return false;
2823 } else if (DstRC == &AArch64::GPR32RegClass &&
2824 SrcRC == &AArch64::GPR64RegClass) {
2825 I.getOperand(1).setSubReg(AArch64::sub_32);
2826 } else {
2827 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
2828 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"
; } } while (false)
;
2829 return false;
2830 }
2831
2832 I.setDesc(TII.get(TargetOpcode::COPY));
2833 return true;
2834 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2835 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2836 I.setDesc(TII.get(AArch64::XTNv4i16));
2837 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2838 return true;
2839 }
2840
2841 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2842 MachineIRBuilder MIB(I);
2843 MachineInstr *Extract = emitExtractVectorElt(
2844 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2845 if (!Extract)
2846 return false;
2847 I.eraseFromParent();
2848 return true;
2849 }
2850
2851 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2852 if (Opcode == TargetOpcode::G_PTRTOINT) {
2853 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector")((DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"
) ? static_cast<void> (0) : __assert_fail ("DstTy.isVector() && \"Expected an FPR ptrtoint to be a vector\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2853, __PRETTY_FUNCTION__))
;
2854 I.setDesc(TII.get(TargetOpcode::COPY));
2855 return true;
2856 }
2857 }
2858
2859 return false;
2860 }
2861
2862 case TargetOpcode::G_ANYEXT: {
2863 const Register DstReg = I.getOperand(0).getReg();
2864 const Register SrcReg = I.getOperand(1).getReg();
2865
2866 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2867 if (RBDst.getID() != AArch64::GPRRegBankID) {
2868 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDstdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
2869 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBDst << ", expected: GPR\n"; } } while (false)
;
2870 return false;
2871 }
2872
2873 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2874 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2875 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
2876 << ", expected: GPR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT on bank: " <<
RBSrc << ", expected: GPR\n"; } } while (false)
;
2877 return false;
2878 }
2879
2880 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2881
2882 if (DstSize == 0) {
2883 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"
; } } while (false)
;
2884 return false;
2885 }
2886
2887 if (DstSize != 64 && DstSize > 32) {
2888 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
2889 << ", expected: 32 or 64\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ANYEXT to size: " <<
DstSize << ", expected: 32 or 64\n"; } } while (false)
;
2890 return false;
2891 }
2892 // At this point G_ANYEXT is just like a plain COPY, but we need
2893 // to explicitly form the 64-bit value if any.
2894 if (DstSize > 32) {
2895 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2896 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2897 .addDef(ExtSrc)
2898 .addImm(0)
2899 .addUse(SrcReg)
2900 .addImm(AArch64::sub_32);
2901 I.getOperand(1).setReg(ExtSrc);
2902 }
2903 return selectCopy(I, TII, MRI, TRI, RBI);
2904 }
2905
2906 case TargetOpcode::G_ZEXT:
2907 case TargetOpcode::G_SEXT_INREG:
2908 case TargetOpcode::G_SEXT: {
2909 unsigned Opcode = I.getOpcode();
2910 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2911 const Register DefReg = I.getOperand(0).getReg();
2912 Register SrcReg = I.getOperand(1).getReg();
2913 const LLT DstTy = MRI.getType(DefReg);
2914 const LLT SrcTy = MRI.getType(SrcReg);
2915 unsigned DstSize = DstTy.getSizeInBits();
2916 unsigned SrcSize = SrcTy.getSizeInBits();
2917
2918 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2919 // extended is encoded in the imm.
2920 if (Opcode == TargetOpcode::G_SEXT_INREG)
2921 SrcSize = I.getOperand(2).getImm();
2922
2923 if (DstTy.isVector())
2924 return false; // Should be handled by imported patterns.
2925
2926 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2928, __PRETTY_FUNCTION__))
2927 AArch64::GPRRegBankID &&(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2928, __PRETTY_FUNCTION__))
2928 "Unexpected ext regbank")(((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID
&& "Unexpected ext regbank") ? static_cast<void>
(0) : __assert_fail ("(*RBI.getRegBank(DefReg, MRI, TRI)).getID() == AArch64::GPRRegBankID && \"Unexpected ext regbank\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 2928, __PRETTY_FUNCTION__))
;
2929
2930 MachineIRBuilder MIB(I);
2931 MachineInstr *ExtI;
2932
2933 // First check if we're extending the result of a load which has a dest type
2934 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2935 // GPR register on AArch64 and all loads which are smaller automatically
2936 // zero-extend the upper bits. E.g.
2937 // %v(s8) = G_LOAD %p, :: (load 1)
2938 // %v2(s32) = G_ZEXT %v(s8)
2939 if (!IsSigned) {
2940 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2941 bool IsGPR =
2942 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2943 if (LoadMI && IsGPR) {
2944 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2945 unsigned BytesLoaded = MemOp->getSize();
2946 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2947 return selectCopy(I, TII, MRI, TRI, RBI);
2948 }
2949
2950 // If we are zero extending from 32 bits to 64 bits, it's possible that
2951 // the instruction implicitly does the zero extend for us. In that case,
2952 // we can just emit a SUBREG_TO_REG.
2953 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2954 // Unlike with the G_LOAD case, we don't want to look through copies
2955 // here.
2956 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2957 if (Def && isDef32(*Def)) {
2958 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2959 .addImm(0)
2960 .addUse(SrcReg)
2961 .addImm(AArch64::sub_32);
2962
2963 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2964 MRI)) {
2965 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT destination\n"
; } } while (false)
;
2966 return false;
2967 }
2968
2969 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2970 MRI)) {
2971 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain G_ZEXT source\n"
; } } while (false)
;
2972 return false;
2973 }
2974
2975 I.eraseFromParent();
2976 return true;
2977 }
2978 }
2979 }
2980
2981 if (DstSize == 64) {
2982 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2983 // FIXME: Can we avoid manually doing this?
2984 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2985 MRI)) {
2986 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
2987 << " operand\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Failed to constrain " <<
TII.getName(Opcode) << " operand\n"; } } while (false)
;
2988 return false;
2989 }
2990 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2991 {&AArch64::GPR64RegClass}, {})
2992 .addImm(0)
2993 .addUse(SrcReg)
2994 .addImm(AArch64::sub_32)
2995 .getReg(0);
2996 }
2997
2998 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2999 {DefReg}, {SrcReg})
3000 .addImm(0)
3001 .addImm(SrcSize - 1);
3002 } else if (DstSize <= 32) {
3003 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3004 {DefReg}, {SrcReg})
3005 .addImm(0)
3006 .addImm(SrcSize - 1);
3007 } else {
3008 return false;
3009 }
3010
3011 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3012 I.eraseFromParent();
3013 return true;
3014 }
3015
3016 case TargetOpcode::G_SITOFP:
3017 case TargetOpcode::G_UITOFP:
3018 case TargetOpcode::G_FPTOSI:
3019 case TargetOpcode::G_FPTOUI: {
3020 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3021 SrcTy = MRI.getType(I.getOperand(1).getReg());
3022 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3023 if (NewOpc == Opcode)
3024 return false;
3025
3026 I.setDesc(TII.get(NewOpc));
3027 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3028
3029 return true;
3030 }
3031
3032 case TargetOpcode::G_FREEZE:
3033 return selectCopy(I, TII, MRI, TRI, RBI);
3034
3035 case TargetOpcode::G_INTTOPTR:
3036 // The importer is currently unable to import pointer types since they
3037 // didn't exist in SelectionDAG.
3038 return selectCopy(I, TII, MRI, TRI, RBI);
3039
3040 case TargetOpcode::G_BITCAST:
3041 // Imported SelectionDAG rules can handle every bitcast except those that
3042 // bitcast from a type to the same type. Ideally, these shouldn't occur
3043 // but we might not run an optimizer that deletes them. The other exception
3044 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3045 // of them.
3046 return selectCopy(I, TII, MRI, TRI, RBI);
3047
3048 case TargetOpcode::G_SELECT: {
3049 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3050 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
3051 << ", expected: " << LLT::scalar(1) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_SELECT cond has type: "
<< Ty << ", expected: " << LLT::scalar(1) <<
'\n'; } } while (false)
;
3052 return false;
3053 }
3054
3055 const Register CondReg = I.getOperand(1).getReg();
3056 const Register TReg = I.getOperand(2).getReg();
3057 const Register FReg = I.getOperand(3).getReg();
3058
3059 if (tryOptSelect(I))
3060 return true;
3061
3062 // Make sure to use an unused vreg instead of wzr, so that the peephole
3063 // optimizations will be able to optimize these.
3064 MachineIRBuilder MIB(I);
3065 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3066 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3067 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3068 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3069 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3070 return false;
3071 I.eraseFromParent();
3072 return true;
3073 }
3074 case TargetOpcode::G_ICMP: {
3075 if (Ty.isVector())
3076 return selectVectorICmp(I, MRI);
3077
3078 if (Ty != LLT::scalar(32)) {
3079 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Tydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
3080 << ", expected: " << LLT::scalar(32) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "G_ICMP result has type: "
<< Ty << ", expected: " << LLT::scalar(32)
<< '\n'; } } while (false)
;
3081 return false;
3082 }
3083
3084 MachineIRBuilder MIRBuilder(I);
3085 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3086 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
3087 MIRBuilder);
3088 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
3089 I.eraseFromParent();
3090 return true;
3091 }
3092
3093 case TargetOpcode::G_FCMP: {
3094 MachineIRBuilder MIRBuilder(I);
3095 CmpInst::Predicate Pred =
3096 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3097 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
3098 MIRBuilder, Pred) ||
3099 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
3100 return false;
3101 I.eraseFromParent();
3102 return true;
3103 }
3104 case TargetOpcode::G_VASTART:
3105 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3106 : selectVaStartAAPCS(I, MF, MRI);
3107 case TargetOpcode::G_INTRINSIC:
3108 return selectIntrinsic(I, MRI);
3109 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3110 return selectIntrinsicWithSideEffects(I, MRI);
3111 case TargetOpcode::G_IMPLICIT_DEF: {
3112 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3113 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3114 const Register DstReg = I.getOperand(0).getReg();
3115 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3116 const TargetRegisterClass *DstRC =
3117 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3118 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3119 return true;
3120 }
3121 case TargetOpcode::G_BLOCK_ADDR: {
3122 if (TM.getCodeModel() == CodeModel::Large) {
3123 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3124 I.eraseFromParent();
3125 return true;
3126 } else {
3127 I.setDesc(TII.get(AArch64::MOVaddrBA));
3128 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3129 I.getOperand(0).getReg())
3130 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3131 /* Offset */ 0, AArch64II::MO_PAGE)
3132 .addBlockAddress(
3133 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3134 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3135 I.eraseFromParent();
3136 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3137 }
3138 }
3139 case AArch64::G_DUP: {
3140 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3141 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3142 // difficult because at RBS we may end up pessimizing the fpr case if we
3143 // decided to add an anyextend to fix this. Manual selection is the most
3144 // robust solution for now.
3145 Register SrcReg = I.getOperand(1).getReg();
3146 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID)
3147 return false; // We expect the fpr regbank case to be imported.
3148 LLT SrcTy = MRI.getType(SrcReg);
3149 if (SrcTy.getSizeInBits() == 16)
3150 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3151 else if (SrcTy.getSizeInBits() == 8)
3152 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3153 else
3154 return false;
3155 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3156 }
3157 case TargetOpcode::G_INTRINSIC_TRUNC:
3158 return selectIntrinsicTrunc(I, MRI);
3159 case TargetOpcode::G_INTRINSIC_ROUND:
3160 return selectIntrinsicRound(I, MRI);
3161 case TargetOpcode::G_BUILD_VECTOR:
3162 return selectBuildVector(I, MRI);
3163 case TargetOpcode::G_MERGE_VALUES:
3164 return selectMergeValues(I, MRI);
3165 case TargetOpcode::G_UNMERGE_VALUES:
3166 return selectUnmergeValues(I, MRI);
3167 case TargetOpcode::G_SHUFFLE_VECTOR:
3168 return selectShuffleVector(I, MRI);
3169 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3170 return selectExtractElt(I, MRI);
3171 case TargetOpcode::G_INSERT_VECTOR_ELT:
3172 return selectInsertElt(I, MRI);
3173 case TargetOpcode::G_CONCAT_VECTORS:
3174 return selectConcatVectors(I, MRI);
3175 case TargetOpcode::G_JUMP_TABLE:
3176 return selectJumpTable(I, MRI);
3177 case TargetOpcode::G_VECREDUCE_FADD:
3178 case TargetOpcode::G_VECREDUCE_ADD:
3179 return selectReduction(I, MRI);
3180 }
3181
3182 return false;
3183}
3184
3185bool AArch64InstructionSelector::selectReduction(
3186 MachineInstr &I, MachineRegisterInfo &MRI) const {
3187 Register VecReg = I.getOperand(1).getReg();
3188 LLT VecTy = MRI.getType(VecReg);
3189 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3190 unsigned Opc = 0;
3191 if (VecTy == LLT::vector(16, 8))
3192 Opc = AArch64::ADDVv16i8v;
3193 else if (VecTy == LLT::vector(8, 16))
3194 Opc = AArch64::ADDVv8i16v;
3195 else if (VecTy == LLT::vector(4, 32))
3196 Opc = AArch64::ADDVv4i32v;
3197 else if (VecTy == LLT::vector(2, 64))
3198 Opc = AArch64::ADDPv2i64p;
3199 else {
3200 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for add reduction"
; } } while (false)
;
3201 return false;
3202 }
3203 I.setDesc(TII.get(Opc));
3204 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3205 }
3206
3207 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3208 unsigned Opc = 0;
3209 if (VecTy == LLT::vector(2, 32))
3210 Opc = AArch64::FADDPv2i32p;
3211 else if (VecTy == LLT::vector(2, 64))
3212 Opc = AArch64::FADDPv2i64p;
3213 else {
3214 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unhandled type for fadd reduction"
; } } while (false)
;
3215 return false;
3216 }
3217 I.setDesc(TII.get(Opc));
3218 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3219 }
3220 return false;
3221}
3222
3223bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3224 MachineRegisterInfo &MRI) const {
3225 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT")((I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BRJT && \"Expected G_BRJT\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3225, __PRETTY_FUNCTION__))
;
3226 Register JTAddr = I.getOperand(0).getReg();
3227 unsigned JTI = I.getOperand(1).getIndex();
3228 Register Index = I.getOperand(2).getReg();
3229 MachineIRBuilder MIB(I);
3230
3231 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3232 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3233
3234 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3235 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3236 {TargetReg, ScratchReg}, {JTAddr, Index})
3237 .addJumpTableIndex(JTI);
3238 // Build the indirect branch.
3239 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3240 I.eraseFromParent();
3241 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3242}
3243
3244bool AArch64InstructionSelector::selectJumpTable(
3245 MachineInstr &I, MachineRegisterInfo &MRI) const {
3246 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table")((I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_JUMP_TABLE && \"Expected jump table\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3246, __PRETTY_FUNCTION__))
;
3247 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!")((I.getOperand(1).isJTI() && "Jump table op should have a JTI!"
) ? static_cast<void> (0) : __assert_fail ("I.getOperand(1).isJTI() && \"Jump table op should have a JTI!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3247, __PRETTY_FUNCTION__))
;
3248
3249 Register DstReg = I.getOperand(0).getReg();
3250 unsigned JTI = I.getOperand(1).getIndex();
3251 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3252 MachineIRBuilder MIB(I);
3253 auto MovMI =
3254 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3255 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3256 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3257 I.eraseFromParent();
3258 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3259}
3260
3261bool AArch64InstructionSelector::selectTLSGlobalValue(
3262 MachineInstr &I, MachineRegisterInfo &MRI) const {
3263 if (!STI.isTargetMachO())
3264 return false;
3265 MachineFunction &MF = *I.getParent()->getParent();
3266 MF.getFrameInfo().setAdjustsStack(true);
3267
3268 const GlobalValue &GV = *I.getOperand(1).getGlobal();
3269 MachineIRBuilder MIB(I);
3270
3271 auto LoadGOT =
3272 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3273 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3274
3275 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3276 {LoadGOT.getReg(0)})
3277 .addImm(0);
3278
3279 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3280 // TLS calls preserve all registers except those that absolutely must be
3281 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3282 // silly).
3283 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3284 .addUse(AArch64::X0, RegState::Implicit)
3285 .addDef(AArch64::X0, RegState::Implicit)
3286 .addRegMask(TRI.getTLSCallPreservedMask());
3287
3288 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3289 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3290 MRI);
3291 I.eraseFromParent();
3292 return true;
3293}
3294
3295bool AArch64InstructionSelector::selectIntrinsicTrunc(
3296 MachineInstr &I, MachineRegisterInfo &MRI) const {
3297 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3298
3299 // Select the correct opcode.
3300 unsigned Opc = 0;
3301 if (!SrcTy.isVector()) {
3302 switch (SrcTy.getSizeInBits()) {
3303 default:
3304 case 16:
3305 Opc = AArch64::FRINTZHr;
3306 break;
3307 case 32:
3308 Opc = AArch64::FRINTZSr;
3309 break;
3310 case 64:
3311 Opc = AArch64::FRINTZDr;
3312 break;
3313 }
3314 } else {
3315 unsigned NumElts = SrcTy.getNumElements();
3316 switch (SrcTy.getElementType().getSizeInBits()) {
3317 default:
3318 break;
3319 case 16:
3320 if (NumElts == 4)
3321 Opc = AArch64::FRINTZv4f16;
3322 else if (NumElts == 8)
3323 Opc = AArch64::FRINTZv8f16;
3324 break;
3325 case 32:
3326 if (NumElts == 2)
3327 Opc = AArch64::FRINTZv2f32;
3328 else if (NumElts == 4)
3329 Opc = AArch64::FRINTZv4f32;
3330 break;
3331 case 64:
3332 if (NumElts == 2)
3333 Opc = AArch64::FRINTZv2f64;
3334 break;
3335 }
3336 }
3337
3338 if (!Opc) {
3339 // Didn't get an opcode above, bail.
3340 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"
; } } while (false)
;
3341 return false;
3342 }
3343
3344 // Legalization would have set us up perfectly for this; we just need to
3345 // set the opcode and move on.
3346 I.setDesc(TII.get(Opc));
3347 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3348}
3349
3350bool AArch64InstructionSelector::selectIntrinsicRound(
3351 MachineInstr &I, MachineRegisterInfo &MRI) const {
3352 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3353
3354 // Select the correct opcode.
3355 unsigned Opc = 0;
3356 if (!SrcTy.isVector()) {
3357 switch (SrcTy.getSizeInBits()) {
3358 default:
3359 case 16:
3360 Opc = AArch64::FRINTAHr;
3361 break;
3362 case 32:
3363 Opc = AArch64::FRINTASr;
3364 break;
3365 case 64:
3366 Opc = AArch64::FRINTADr;
3367 break;
3368 }
3369 } else {
3370 unsigned NumElts = SrcTy.getNumElements();
3371 switch (SrcTy.getElementType().getSizeInBits()) {
3372 default:
3373 break;
3374 case 16:
3375 if (NumElts == 4)
3376 Opc = AArch64::FRINTAv4f16;
3377 else if (NumElts == 8)
3378 Opc = AArch64::FRINTAv8f16;
3379 break;
3380 case 32:
3381 if (NumElts == 2)
3382 Opc = AArch64::FRINTAv2f32;
3383 else if (NumElts == 4)
3384 Opc = AArch64::FRINTAv4f32;
3385 break;
3386 case 64:
3387 if (NumElts == 2)
3388 Opc = AArch64::FRINTAv2f64;
3389 break;
3390 }
3391 }
3392
3393 if (!Opc) {
3394 // Didn't get an opcode above, bail.
3395 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"
; } } while (false)
;
3396 return false;
3397 }
3398
3399 // Legalization would have set us up perfectly for this; we just need to
3400 // set the opcode and move on.
3401 I.setDesc(TII.get(Opc));
3402 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3403}
3404
3405bool AArch64InstructionSelector::selectVectorICmp(
3406 MachineInstr &I, MachineRegisterInfo &MRI) const {
3407 Register DstReg = I.getOperand(0).getReg();
3408 LLT DstTy = MRI.getType(DstReg);
3409 Register SrcReg = I.getOperand(2).getReg();
3410 Register Src2Reg = I.getOperand(3).getReg();
3411 LLT SrcTy = MRI.getType(SrcReg);
3412
3413 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3414 unsigned NumElts = DstTy.getNumElements();
3415
3416 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3417 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3418 // Third index is cc opcode:
3419 // 0 == eq
3420 // 1 == ugt
3421 // 2 == uge
3422 // 3 == ult
3423 // 4 == ule
3424 // 5 == sgt
3425 // 6 == sge
3426 // 7 == slt
3427 // 8 == sle
3428 // ne is done by negating 'eq' result.
3429
3430 // This table below assumes that for some comparisons the operands will be
3431 // commuted.
3432 // ult op == commute + ugt op
3433 // ule op == commute + uge op
3434 // slt op == commute + sgt op
3435 // sle op == commute + sge op
3436 unsigned PredIdx = 0;
3437 bool SwapOperands = false;
3438 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3439 switch (Pred) {
3440 case CmpInst::ICMP_NE:
3441 case CmpInst::ICMP_EQ:
3442 PredIdx = 0;
3443 break;
3444 case CmpInst::ICMP_UGT:
3445 PredIdx = 1;
3446 break;
3447 case CmpInst::ICMP_UGE:
3448 PredIdx = 2;
3449 break;
3450 case CmpInst::ICMP_ULT:
3451 PredIdx = 3;
3452 SwapOperands = true;
3453 break;
3454 case CmpInst::ICMP_ULE:
3455 PredIdx = 4;
3456 SwapOperands = true;
3457 break;
3458 case CmpInst::ICMP_SGT:
3459 PredIdx = 5;
3460 break;
3461 case CmpInst::ICMP_SGE:
3462 PredIdx = 6;
3463 break;
3464 case CmpInst::ICMP_SLT:
3465 PredIdx = 7;
3466 SwapOperands = true;
3467 break;
3468 case CmpInst::ICMP_SLE:
3469 PredIdx = 8;
3470 SwapOperands = true;
3471 break;
3472 default:
3473 llvm_unreachable("Unhandled icmp predicate")::llvm::llvm_unreachable_internal("Unhandled icmp predicate",
"/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3473)
;
3474 return false;
3475 }
3476
3477 // This table obviously should be tablegen'd when we have our GISel native
3478 // tablegen selector.
3479
3480 static const unsigned OpcTable[4][4][9] = {
3481 {
3482 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3483 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3484 0 /* invalid */},
3485 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3486 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3487 0 /* invalid */},
3488 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3489 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3490 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3491 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3492 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3493 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3494 },
3495 {
3496 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3497 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3498 0 /* invalid */},
3499 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3500 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3501 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3502 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3503 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3504 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3505 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3506 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3507 0 /* invalid */}
3508 },
3509 {
3510 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3511 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3512 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3513 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3514 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3515 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3516 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3517 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3518 0 /* invalid */},
3519 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3520 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3521 0 /* invalid */}
3522 },
3523 {
3524 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3525 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3526 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3527 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3528 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3529 0 /* invalid */},
3530 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3531 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3532 0 /* invalid */},
3533 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3534 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3535 0 /* invalid */}
3536 },
3537 };
3538 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3539 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3540 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3541 if (!Opc) {
3542 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not map G_ICMP to cmp opcode"
; } } while (false)
;
3543 return false;
3544 }
3545
3546 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3547 const TargetRegisterClass *SrcRC =
3548 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3549 if (!SrcRC) {
3550 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3551 return false;
3552 }
3553
3554 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3555 if (SrcTy.getSizeInBits() == 128)
3556 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3557
3558 if (SwapOperands)
3559 std::swap(SrcReg, Src2Reg);
3560
3561 MachineIRBuilder MIB(I);
3562 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3563 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3564
3565 // Invert if we had a 'ne' cc.
3566 if (NotOpc) {
3567 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3568 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3569 } else {
3570 MIB.buildCopy(DstReg, Cmp.getReg(0));
3571 }
3572 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3573 I.eraseFromParent();
3574 return true;
3575}
3576
3577MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3578 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3579 MachineIRBuilder &MIRBuilder) const {
3580 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3581
3582 auto BuildFn = [&](unsigned SubregIndex) {
3583 auto Ins =
3584 MIRBuilder
3585 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3586 .addImm(SubregIndex);
3587 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3588 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3589 return &*Ins;
3590 };
3591
3592 switch (EltSize) {
3593 case 16:
3594 return BuildFn(AArch64::hsub);
3595 case 32:
3596 return BuildFn(AArch64::ssub);
3597 case 64:
3598 return BuildFn(AArch64::dsub);
3599 default:
3600 return nullptr;
3601 }
3602}
3603
3604bool AArch64InstructionSelector::selectMergeValues(
3605 MachineInstr &I, MachineRegisterInfo &MRI) const {
3606 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode")((I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_MERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3606, __PRETTY_FUNCTION__))
;
3607 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3608 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3609 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation")((!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"
) ? static_cast<void> (0) : __assert_fail ("!DstTy.isVector() && !SrcTy.isVector() && \"invalid merge operation\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3609, __PRETTY_FUNCTION__))
;
3610 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3611
3612 if (I.getNumOperands() != 3)
3613 return false;
3614
3615 // Merging 2 s64s into an s128.
3616 if (DstTy == LLT::scalar(128)) {
3617 if (SrcTy.getSizeInBits() != 64)
3618 return false;
3619 MachineIRBuilder MIB(I);
3620 Register DstReg = I.getOperand(0).getReg();
3621 Register Src1Reg = I.getOperand(1).getReg();
3622 Register Src2Reg = I.getOperand(2).getReg();
3623 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3624 MachineInstr *InsMI =
3625 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3626 if (!InsMI)
3627 return false;
3628 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3629 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3630 if (!Ins2MI)
3631 return false;
3632 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3633 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3634 I.eraseFromParent();
3635 return true;
3636 }
3637
3638 if (RB.getID() != AArch64::GPRRegBankID)
3639 return false;
3640
3641 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3642 return false;
3643
3644 auto *DstRC = &AArch64::GPR64RegClass;
3645 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3646 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3647 TII.get(TargetOpcode::SUBREG_TO_REG))
3648 .addDef(SubToRegDef)
3649 .addImm(0)
3650 .addUse(I.getOperand(1).getReg())
3651 .addImm(AArch64::sub_32);
3652 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3653 // Need to anyext the second scalar before we can use bfm
3654 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3655 TII.get(TargetOpcode::SUBREG_TO_REG))
3656 .addDef(SubToRegDef2)
3657 .addImm(0)
3658 .addUse(I.getOperand(2).getReg())
3659 .addImm(AArch64::sub_32);
3660 MachineInstr &BFM =
3661 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3662 .addDef(I.getOperand(0).getReg())
3663 .addUse(SubToRegDef)
3664 .addUse(SubToRegDef2)
3665 .addImm(32)
3666 .addImm(31);
3667 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3668 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3669 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3670 I.eraseFromParent();
3671 return true;
3672}
3673
3674static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3675 const unsigned EltSize) {
3676 // Choose a lane copy opcode and subregister based off of the size of the
3677 // vector's elements.
3678 switch (EltSize) {
3679 case 16:
3680 CopyOpc = AArch64::CPYi16;
3681 ExtractSubReg = AArch64::hsub;
3682 break;
3683 case 32:
3684 CopyOpc = AArch64::CPYi32;
3685 ExtractSubReg = AArch64::ssub;
3686 break;
3687 case 64:
3688 CopyOpc = AArch64::CPYi64;
3689 ExtractSubReg = AArch64::dsub;
3690 break;
3691 default:
3692 // Unknown size, bail out.
3693 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Elt size '" << EltSize
<< "' unsupported.\n"; } } while (false)
;
3694 return false;
3695 }
3696 return true;
3697}
3698
3699MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3700 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3701 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3702 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3703 unsigned CopyOpc = 0;
3704 unsigned ExtractSubReg = 0;
3705 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3706 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
3707 dbgs() << "Couldn't determine lane copy opcode for instruction.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't determine lane copy opcode for instruction.\n"
; } } while (false)
;
3708 return nullptr;
3709 }
3710
3711 const TargetRegisterClass *DstRC =
3712 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3713 if (!DstRC) {
3714 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine destination register class.\n"
; } } while (false)
;
3715 return nullptr;
3716 }
3717
3718 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3719 const LLT &VecTy = MRI.getType(VecReg);
3720 const TargetRegisterClass *VecRC =
3721 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3722 if (!VecRC) {
3723 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not determine source register class.\n"
; } } while (false)
;
3724 return nullptr;
3725 }
3726
3727 // The register that we're going to copy into.
3728 Register InsertReg = VecReg;
3729 if (!DstReg)
3730 DstReg = MRI.createVirtualRegister(DstRC);
3731 // If the lane index is 0, we just use a subregister COPY.
3732 if (LaneIdx == 0) {
3733 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3734 .addReg(VecReg, 0, ExtractSubReg);
3735 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3736 return &*Copy;
3737 }
3738
3739 // Lane copies require 128-bit wide registers. If we're dealing with an
3740 // unpacked vector, then we need to move up to that width. Insert an implicit
3741 // def and a subregister insert to get us there.
3742 if (VecTy.getSizeInBits() != 128) {
3743 MachineInstr *ScalarToVector = emitScalarToVector(
3744 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3745 if (!ScalarToVector)
3746 return nullptr;
3747 InsertReg = ScalarToVector->getOperand(0).getReg();
3748 }
3749
3750 MachineInstr *LaneCopyMI =
3751 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3752 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3753
3754 // Make sure that we actually constrain the initial copy.
3755 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3756 return LaneCopyMI;
3757}
3758
3759bool AArch64InstructionSelector::selectExtractElt(
3760 MachineInstr &I, MachineRegisterInfo &MRI) const {
3761 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3762, __PRETTY_FUNCTION__))
3762 "unexpected opcode!")((I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
"unexpected opcode!") ? static_cast<void> (0) : __assert_fail
("I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && \"unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3762, __PRETTY_FUNCTION__))
;
3763 Register DstReg = I.getOperand(0).getReg();
3764 const LLT NarrowTy = MRI.getType(DstReg);
3765 const Register SrcReg = I.getOperand(1).getReg();
3766 const LLT WideTy = MRI.getType(SrcReg);
3767 (void)WideTy;
3768 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3769, __PRETTY_FUNCTION__))
3769 "source register size too small!")((WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3769, __PRETTY_FUNCTION__))
;
3770 assert(!NarrowTy.isVector() && "cannot extract vector into vector!")((!NarrowTy.isVector() && "cannot extract vector into vector!"
) ? static_cast<void> (0) : __assert_fail ("!NarrowTy.isVector() && \"cannot extract vector into vector!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3770, __PRETTY_FUNCTION__))
;
3771
3772 // Need the lane index to determine the correct copy opcode.
3773 MachineOperand &LaneIdxOp = I.getOperand(2);
3774 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?")((LaneIdxOp.isReg() && "Lane index operand was not a register?"
) ? static_cast<void> (0) : __assert_fail ("LaneIdxOp.isReg() && \"Lane index operand was not a register?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3774, __PRETTY_FUNCTION__))
;
3775
3776 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3777 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Cannot extract into GPR.\n"
; } } while (false)
;
3778 return false;
3779 }
3780
3781 // Find the index to extract from.
3782 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3783 if (!VRegAndVal)
3784 return false;
3785 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3786
3787 MachineIRBuilder MIRBuilder(I);
3788
3789 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3790 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3791 LaneIdx, MIRBuilder);
3792 if (!Extract)
3793 return false;
3794
3795 I.eraseFromParent();
3796 return true;
3797}
3798
3799bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3800 MachineInstr &I, MachineRegisterInfo &MRI) const {
3801 unsigned NumElts = I.getNumOperands() - 1;
3802 Register SrcReg = I.getOperand(NumElts).getReg();
3803 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3804 const LLT SrcTy = MRI.getType(SrcReg);
3805
3806 assert(NarrowTy.isVector() && "Expected an unmerge into vectors")((NarrowTy.isVector() && "Expected an unmerge into vectors"
) ? static_cast<void> (0) : __assert_fail ("NarrowTy.isVector() && \"Expected an unmerge into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3806, __PRETTY_FUNCTION__))
;
3807 if (SrcTy.getSizeInBits() > 128) {
3808 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unexpected vector type for vec split unmerge"
; } } while (false)
;
3809 return false;
3810 }
3811
3812 MachineIRBuilder MIB(I);
3813
3814 // We implement a split vector operation by treating the sub-vectors as
3815 // scalars and extracting them.
3816 const RegisterBank &DstRB =
3817 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3818 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3819 Register Dst = I.getOperand(OpIdx).getReg();
3820 MachineInstr *Extract =
3821 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3822 if (!Extract)
3823 return false;
3824 }
3825 I.eraseFromParent();
3826 return true;
3827}
3828
3829bool AArch64InstructionSelector::selectUnmergeValues(
3830 MachineInstr &I, MachineRegisterInfo &MRI) const {
3831 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3832, __PRETTY_FUNCTION__))
3832 "unexpected opcode")((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && \"unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3832, __PRETTY_FUNCTION__))
;
3833
3834 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3835 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3836 AArch64::FPRRegBankID ||
3837 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3838 AArch64::FPRRegBankID) {
3839 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
3840 "currently unsupported.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
"currently unsupported.\n"; } } while (false)
;
3841 return false;
3842 }
3843
3844 // The last operand is the vector source register, and every other operand is
3845 // a register to unpack into.
3846 unsigned NumElts = I.getNumOperands() - 1;
3847 Register SrcReg = I.getOperand(NumElts).getReg();
3848 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3849 const LLT WideTy = MRI.getType(SrcReg);
3850 (void)WideTy;
3851 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3852, __PRETTY_FUNCTION__))
3852 "can only unmerge from vector or s128 types!")(((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
"can only unmerge from vector or s128 types!") ? static_cast
<void> (0) : __assert_fail ("(WideTy.isVector() || WideTy.getSizeInBits() == 128) && \"can only unmerge from vector or s128 types!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3852, __PRETTY_FUNCTION__))
;
3853 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3854, __PRETTY_FUNCTION__))
3854 "source register size too small!")((WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
"source register size too small!") ? static_cast<void>
(0) : __assert_fail ("WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && \"source register size too small!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3854, __PRETTY_FUNCTION__))
;
3855
3856 if (!NarrowTy.isScalar())
3857 return selectSplitVectorUnmerge(I, MRI);
3858
3859 MachineIRBuilder MIB(I);
3860
3861 // Choose a lane copy opcode and subregister based off of the size of the
3862 // vector's elements.
3863 unsigned CopyOpc = 0;
3864 unsigned ExtractSubReg = 0;
3865 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3866 return false;
3867
3868 // Set up for the lane copies.
3869 MachineBasicBlock &MBB = *I.getParent();
3870
3871 // Stores the registers we'll be copying from.
3872 SmallVector<Register, 4> InsertRegs;
3873
3874 // We'll use the first register twice, so we only need NumElts-1 registers.
3875 unsigned NumInsertRegs = NumElts - 1;
3876
3877 // If our elements fit into exactly 128 bits, then we can copy from the source
3878 // directly. Otherwise, we need to do a bit of setup with some subregister
3879 // inserts.
3880 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3881 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3882 } else {
3883 // No. We have to perform subregister inserts. For each insert, create an
3884 // implicit def and a subregister insert, and save the register we create.
3885 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3886 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3887 MachineInstr &ImpDefMI =
3888 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3889 ImpDefReg);
3890
3891 // Now, create the subregister insert from SrcReg.
3892 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3893 MachineInstr &InsMI =
3894 *BuildMI(MBB, I, I.getDebugLoc(),
3895 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3896 .addUse(ImpDefReg)
3897 .addUse(SrcReg)
3898 .addImm(AArch64::dsub);
3899
3900 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3901 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3902
3903 // Save the register so that we can copy from it after.
3904 InsertRegs.push_back(InsertReg);
3905 }
3906 }
3907
3908 // Now that we've created any necessary subregister inserts, we can
3909 // create the copies.
3910 //
3911 // Perform the first copy separately as a subregister copy.
3912 Register CopyTo = I.getOperand(0).getReg();
3913 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3914 .addReg(InsertRegs[0], 0, ExtractSubReg);
3915 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3916
3917 // Now, perform the remaining copies as vector lane copies.
3918 unsigned LaneIdx = 1;
3919 for (Register InsReg : InsertRegs) {
3920 Register CopyTo = I.getOperand(LaneIdx).getReg();
3921 MachineInstr &CopyInst =
3922 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3923 .addUse(InsReg)
3924 .addImm(LaneIdx);
3925 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3926 ++LaneIdx;
3927 }
3928
3929 // Separately constrain the first copy's destination. Because of the
3930 // limitation in constrainOperandRegClass, we can't guarantee that this will
3931 // actually be constrained. So, do it ourselves using the second operand.
3932 const TargetRegisterClass *RC =
3933 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3934 if (!RC) {
3935 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't constrain copy destination.\n"
; } } while (false)
;
3936 return false;
3937 }
3938
3939 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3940 I.eraseFromParent();
3941 return true;
3942}
3943
3944bool AArch64InstructionSelector::selectConcatVectors(
3945 MachineInstr &I, MachineRegisterInfo &MRI) const {
3946 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3947, __PRETTY_FUNCTION__))
3947 "Unexpected opcode")((I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 3947, __PRETTY_FUNCTION__))
;
3948 Register Dst = I.getOperand(0).getReg();
3949 Register Op1 = I.getOperand(1).getReg();
3950 Register Op2 = I.getOperand(2).getReg();
3951 MachineIRBuilder MIRBuilder(I);
3952 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3953 if (!ConcatMI)
3954 return false;
3955 I.eraseFromParent();
3956 return true;
3957}
3958
3959unsigned
3960AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3961 MachineFunction &MF) const {
3962 Type *CPTy = CPVal->getType();
3963 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3964
3965 MachineConstantPool *MCP = MF.getConstantPool();
3966 return MCP->getConstantPoolIndex(CPVal, Alignment);
3967}
3968
3969MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3970 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3971 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3972
3973 auto Adrp =
3974 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3975 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3976
3977 MachineInstr *LoadMI = nullptr;
3978 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3979 case 16:
3980 LoadMI =
3981 &*MIRBuilder
3982 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3983 .addConstantPoolIndex(CPIdx, 0,
3984 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3985 break;
3986 case 8:
3987 LoadMI = &*MIRBuilder
3988 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3989 .addConstantPoolIndex(
3990 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3991 break;
3992 default:
3993 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
3994 << *CPVal->getType())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType(); } } while (false)
;
3995 return nullptr;
3996 }
3997 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3998 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3999 return LoadMI;
4000}
4001
4002/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4003/// size and RB.
4004static std::pair<unsigned, unsigned>
4005getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4006 unsigned Opc, SubregIdx;
4007 if (RB.getID() == AArch64::GPRRegBankID) {
4008 if (EltSize == 16) {
4009 Opc = AArch64::INSvi16gpr;
4010 SubregIdx = AArch64::ssub;
4011 } else if (EltSize == 32) {
4012 Opc = AArch64::INSvi32gpr;
4013 SubregIdx = AArch64::ssub;
4014 } else if (EltSize == 64) {
4015 Opc = AArch64::INSvi64gpr;
4016 SubregIdx = AArch64::dsub;
4017 } else {
4018 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4018)
;
4019 }
4020 } else {
4021 if (EltSize == 8) {
4022 Opc = AArch64::INSvi8lane;
4023 SubregIdx = AArch64::bsub;
4024 } else if (EltSize == 16) {
4025 Opc = AArch64::INSvi16lane;
4026 SubregIdx = AArch64::hsub;
4027 } else if (EltSize == 32) {
4028 Opc = AArch64::INSvi32lane;
4029 SubregIdx = AArch64::ssub;
4030 } else if (EltSize == 64) {
4031 Opc = AArch64::INSvi64lane;
4032 SubregIdx = AArch64::dsub;
4033 } else {
4034 llvm_unreachable("invalid elt size!")::llvm::llvm_unreachable_internal("invalid elt size!", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4034)
;
4035 }
4036 }
4037 return std::make_pair(Opc, SubregIdx);
4038}
4039
4040MachineInstr *AArch64InstructionSelector::emitInstr(
4041 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4042 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4043 const ComplexRendererFns &RenderFns) const {
4044 assert(Opcode && "Expected an opcode?")((Opcode && "Expected an opcode?") ? static_cast<void
> (0) : __assert_fail ("Opcode && \"Expected an opcode?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4044, __PRETTY_FUNCTION__))
;
4045 assert(!isPreISelGenericOpcode(Opcode) &&((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4046, __PRETTY_FUNCTION__))
4046 "Function should only be used to produce selected instructions!")((!isPreISelGenericOpcode(Opcode) && "Function should only be used to produce selected instructions!"
) ? static_cast<void> (0) : __assert_fail ("!isPreISelGenericOpcode(Opcode) && \"Function should only be used to produce selected instructions!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4046, __PRETTY_FUNCTION__))
;
4047 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4048 if (RenderFns)
4049 for (auto &Fn : *RenderFns)
4050 Fn(MI);
4051 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4052 return &*MI;
4053}
4054
4055MachineInstr *AArch64InstructionSelector::emitAddSub(
4056 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4057 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4058 MachineIRBuilder &MIRBuilder) const {
4059 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4060 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4060, __PRETTY_FUNCTION__))
;
4061 auto Ty = MRI.getType(LHS.getReg());
4062 assert(!Ty.isVector() && "Expected a scalar or pointer?")((!Ty.isVector() && "Expected a scalar or pointer?") ?
static_cast<void> (0) : __assert_fail ("!Ty.isVector() && \"Expected a scalar or pointer?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4062, __PRETTY_FUNCTION__))
;
4063 unsigned Size = Ty.getSizeInBits();
4064 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit type only\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4064, __PRETTY_FUNCTION__))
;
4065 bool Is32Bit = Size == 32;
4066
4067 // INSTRri form with positive arithmetic immediate.
4068 if (auto Fns = selectArithImmed(RHS))
4069 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4070 MIRBuilder, Fns);
4071
4072 // INSTRri form with negative arithmetic immediate.
4073 if (auto Fns = selectNegArithImmed(RHS))
4074 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4075 MIRBuilder, Fns);
4076
4077 // INSTRrx form.
4078 if (auto Fns = selectArithExtendedRegister(RHS))
4079 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4080 MIRBuilder, Fns);
4081
4082 // INSTRrs form.
4083 if (auto Fns = selectShiftedRegister(RHS))
4084 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4085 MIRBuilder, Fns);
4086 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4087 MIRBuilder);
4088}
4089
4090MachineInstr *
4091AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4092 MachineOperand &RHS,
4093 MachineIRBuilder &MIRBuilder) const {
4094 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4095 {{AArch64::ADDXri, AArch64::ADDWri},
4096 {AArch64::ADDXrs, AArch64::ADDWrs},
4097 {AArch64::ADDXrr, AArch64::ADDWrr},
4098 {AArch64::SUBXri, AArch64::SUBWri},
4099 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4100 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4101}
4102
4103MachineInstr *
4104AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4105 MachineOperand &RHS,
4106 MachineIRBuilder &MIRBuilder) const {
4107 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4108 {{AArch64::ADDSXri, AArch64::ADDSWri},
4109 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4110 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4111 {AArch64::SUBSXri, AArch64::SUBSWri},
4112 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4113 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4114}
4115
4116MachineInstr *
4117AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4118 MachineOperand &RHS,
4119 MachineIRBuilder &MIRBuilder) const {
4120 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4121 {{AArch64::SUBSXri, AArch64::SUBSWri},
4122 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4123 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4124 {AArch64::ADDSXri, AArch64::ADDSWri},
4125 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4126 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4127}
4128
4129MachineInstr *
4130AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4131 MachineIRBuilder &MIRBuilder) const {
4132 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4133 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4134 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4135 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4136}
4137
4138MachineInstr *
4139AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4140 MachineIRBuilder &MIRBuilder) const {
4141 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?")((LHS.isReg() && RHS.isReg() && "Expected register operands?"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected register operands?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4141, __PRETTY_FUNCTION__))
;
4142 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4143 LLT Ty = MRI.getType(LHS.getReg());
4144 unsigned RegSize = Ty.getSizeInBits();
4145 bool Is32Bit = (RegSize == 32);
4146 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4147 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4148 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4149 // ANDS needs a logical immediate for its immediate form. Check if we can
4150 // fold one in.
4151 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4152 int64_t Imm = ValAndVReg->Value.getSExtValue();
4153
4154 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4155 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4156 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4157 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4158 return &*TstMI;
4159 }
4160 }
4161
4162 if (auto Fns = selectLogicalShiftedRegister(RHS))
4163 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4164 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4165}
4166
4167MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4168 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4169 MachineIRBuilder &MIRBuilder) const {
4170 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!")((LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"
) ? static_cast<void> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && \"Expected LHS and RHS to be registers!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4170, __PRETTY_FUNCTION__))
;
4171 assert(Predicate.isPredicate() && "Expected predicate?")((Predicate.isPredicate() && "Expected predicate?") ?
static_cast<void> (0) : __assert_fail ("Predicate.isPredicate() && \"Expected predicate?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4171, __PRETTY_FUNCTION__))
;
4172 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4173 LLT CmpTy = MRI.getType(LHS.getReg());
4174 assert(!CmpTy.isVector() && "Expected scalar or pointer")((!CmpTy.isVector() && "Expected scalar or pointer") ?
static_cast<void> (0) : __assert_fail ("!CmpTy.isVector() && \"Expected scalar or pointer\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4174, __PRETTY_FUNCTION__))
;
4175 unsigned Size = CmpTy.getSizeInBits();
4176 (void)Size;
4177 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?")(((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"Expected a 32-bit or 64-bit LHS/RHS?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4177, __PRETTY_FUNCTION__))
;
4178 // Fold the compare into a cmn or tst if possible.
4179 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4180 return FoldCmp;
4181 auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4182 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4183}
4184
4185MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4186 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4187 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4188#ifndef NDEBUG
4189 LLT Ty = MRI.getType(Dst);
4190 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4191, __PRETTY_FUNCTION__))
4191 "Expected a 32-bit scalar register?")((!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?") ? static_cast<void>
(0) : __assert_fail ("!Ty.isVector() && Ty.getSizeInBits() == 32 && \"Expected a 32-bit scalar register?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4191, __PRETTY_FUNCTION__))
;
4192#endif
4193 const Register ZeroReg = AArch64::WZR;
4194 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
4195 auto CSet =
4196 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4197 .addImm(getInvertedCondCode(CC));
4198 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
4199 return &*CSet;
4200 };
4201
4202 AArch64CC::CondCode CC1, CC2;
4203 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4204 if (CC2 == AArch64CC::AL)
4205 return EmitCSet(Dst, CC1);
4206
4207 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4208 Register Def1Reg = MRI.createVirtualRegister(RC);
4209 Register Def2Reg = MRI.createVirtualRegister(RC);
4210 EmitCSet(Def1Reg, CC1);
4211 EmitCSet(Def2Reg, CC2);
4212 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4213 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4214 return &*OrMI;
4215}
4216
4217MachineInstr *
4218AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4219 MachineIRBuilder &MIRBuilder,
4220 Optional<CmpInst::Predicate> Pred) const {
4221 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4222 LLT Ty = MRI.getType(LHS);
4223 if (Ty.isVector())
4224 return nullptr;
4225 unsigned OpSize = Ty.getSizeInBits();
4226 if (OpSize != 32 && OpSize != 64)
4227 return nullptr;
4228
4229 // If this is a compare against +0.0, then we don't have
4230 // to explicitly materialize a constant.
4231 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4232 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4233
4234 auto IsEqualityPred = [](CmpInst::Predicate P) {
4235 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4236 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4237 };
4238 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4239 // Try commutating the operands.
4240 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4241 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4242 ShouldUseImm = true;
4243 std::swap(LHS, RHS);
4244 }
4245 }
4246 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4247 {AArch64::FCMPSri, AArch64::FCMPDri}};
4248 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4249
4250 // Partially build the compare. Decide if we need to add a use for the
4251 // third operand based off whether or not we're comparing against 0.0.
4252 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4253 if (!ShouldUseImm)
4254 CmpMI.addUse(RHS);
4255 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4256 return &*CmpMI;
4257}
4258
4259MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4260 Optional<Register> Dst, Register Op1, Register Op2,
4261 MachineIRBuilder &MIRBuilder) const {
4262 // We implement a vector concat by:
4263 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4264 // 2. Insert the upper vector into the destination's upper element
4265 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4266 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4267
4268 const LLT Op1Ty = MRI.getType(Op1);
4269 const LLT Op2Ty = MRI.getType(Op2);
4270
4271 if (Op1Ty != Op2Ty) {
4272 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat of differing vector tys"
; } } while (false)
;
4273 return nullptr;
4274 }
4275 assert(Op1Ty.isVector() && "Expected a vector for vector concat")((Op1Ty.isVector() && "Expected a vector for vector concat"
) ? static_cast<void> (0) : __assert_fail ("Op1Ty.isVector() && \"Expected a vector for vector concat\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4275, __PRETTY_FUNCTION__))
;
4276
4277 if (Op1Ty.getSizeInBits() >= 128) {
4278 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat not supported for full size vectors"
; } } while (false)
;
4279 return nullptr;
4280 }
4281
4282 // At the moment we just support 64 bit vector concats.
4283 if (Op1Ty.getSizeInBits() != 64) {
4284 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Vector concat supported for 64b vectors"
; } } while (false)
;
4285 return nullptr;
4286 }
4287
4288 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4289 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4290 const TargetRegisterClass *DstRC =
4291 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4292
4293 MachineInstr *WidenedOp1 =
4294 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4295 MachineInstr *WidenedOp2 =
4296 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4297 if (!WidenedOp1 || !WidenedOp2) {
4298 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not emit a vector from scalar value"
; } } while (false)
;
4299 return nullptr;
4300 }
4301
4302 // Now do the insert of the upper element.
4303 unsigned InsertOpc, InsSubRegIdx;
4304 std::tie(InsertOpc, InsSubRegIdx) =
4305 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4306
4307 if (!Dst)
4308 Dst = MRI.createVirtualRegister(DstRC);
4309 auto InsElt =
4310 MIRBuilder
4311 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4312 .addImm(1) /* Lane index */
4313 .addUse(WidenedOp2->getOperand(0).getReg())
4314 .addImm(0);
4315 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4316 return &*InsElt;
4317}
4318
4319MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
4320 MachineInstr &I, MachineRegisterInfo &MRI) const {
4321 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4322, __PRETTY_FUNCTION__))
4322 "Expected a G_FCONSTANT!")((I.getOpcode() == TargetOpcode::G_FCONSTANT && "Expected a G_FCONSTANT!"
) ? static_cast<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_FCONSTANT && \"Expected a G_FCONSTANT!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4322, __PRETTY_FUNCTION__))
;
4323 MachineOperand &ImmOp = I.getOperand(1);
4324 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
4325
4326 // Only handle 32 and 64 bit defs for now.
4327 if (DefSize != 32 && DefSize != 64)
4328 return nullptr;
4329
4330 // Don't handle null values using FMOV.
4331 if (ImmOp.getFPImm()->isNullValue())
4332 return nullptr;
4333
4334 // Get the immediate representation for the FMOV.
4335 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
4336 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
4337 : AArch64_AM::getFP64Imm(ImmValAPF);
4338
4339 // If this is -1, it means the immediate can't be represented as the requested
4340 // floating point value. Bail.
4341 if (Imm == -1)
4342 return nullptr;
4343
4344 // Update MI to represent the new FMOV instruction, constrain it, and return.
4345 ImmOp.ChangeToImmediate(Imm);
4346 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
4347 I.setDesc(TII.get(MovOpc));
4348 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4349 return &I;
4350}
4351
4352MachineInstr *
4353AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
4354 MachineIRBuilder &MIRBuilder) const {
4355 // CSINC increments the result when the predicate is false. Invert it.
4356 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
4357 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
4358 auto I =
4359 MIRBuilder
4360 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
4361 .addImm(InvCC);
4362 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
4363 return &*I;
4364}
4365
4366std::pair<MachineInstr *, AArch64CC::CondCode>
4367AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4368 MachineOperand &LHS,
4369 MachineOperand &RHS,
4370 MachineIRBuilder &MIRBuilder) const {
4371 switch (Opcode) {
4372 default:
4373 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4373)
;
4374 case TargetOpcode::G_SADDO:
4375 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4376 case TargetOpcode::G_UADDO:
4377 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4378 case TargetOpcode::G_SSUBO:
4379 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4380 case TargetOpcode::G_USUBO:
4381 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4382 }
4383}
4384
4385bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
4386 MachineIRBuilder MIB(I);
4387 MachineRegisterInfo &MRI = *MIB.getMRI();
4388 // We want to recognize this pattern:
4389 //
4390 // $z = G_FCMP pred, $x, $y
4391 // ...
4392 // $w = G_SELECT $z, $a, $b
4393 //
4394 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4395 // some copies/truncs in between.)
4396 //
4397 // If we see this, then we can emit something like this:
4398 //
4399 // fcmp $x, $y
4400 // fcsel $w, $a, $b, pred
4401 //
4402 // Rather than emitting both of the rather long sequences in the standard
4403 // G_FCMP/G_SELECT select methods.
4404
4405 // First, check if the condition is defined by a compare.
4406 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4407 while (CondDef) {
4408 // We can only fold if all of the defs have one use.
4409 Register CondDefReg = CondDef->getOperand(0).getReg();
4410 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4411 // Unless it's another select.
4412 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4413 if (CondDef == &UI)
4414 continue;
4415 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4416 return false;
4417 }
4418 }
4419
4420 // We can skip over G_TRUNC since the condition is 1-bit.
4421 // Truncating/extending can have no impact on the value.
4422 unsigned Opc = CondDef->getOpcode();
4423 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4424 break;
4425
4426 // Can't see past copies from physregs.
4427 if (Opc == TargetOpcode::COPY &&
4428 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4429 return false;
4430
4431 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4432 }
4433
4434 // Is the condition defined by a compare?
4435 if (!CondDef)
4436 return false;
4437
4438 unsigned CondOpc = CondDef->getOpcode();
4439 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4440 return false;
4441
4442 AArch64CC::CondCode CondCode;
4443 if (CondOpc == TargetOpcode::G_ICMP) {
4444 auto Pred =
4445 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4446 CondCode = changeICMPPredToAArch64CC(Pred);
4447 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4448 CondDef->getOperand(1), MIB);
4449 } else {
4450 // Get the condition code for the select.
4451 auto Pred =
4452 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4453 AArch64CC::CondCode CondCode2;
4454 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4455
4456 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4457 // instructions to emit the comparison.
4458 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4459 // unnecessary.
4460 if (CondCode2 != AArch64CC::AL)
4461 return false;
4462
4463 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4464 CondDef->getOperand(3).getReg(), MIB)) {
4465 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Couldn't emit compare for select!\n"
; } } while (false)
;
4466 return false;
4467 }
4468 }
4469
4470 // Emit the select.
4471 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4472 I.getOperand(3).getReg(), CondCode, MIB);
4473 I.eraseFromParent();
4474 return true;
4475}
4476
4477MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4478 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4479 MachineIRBuilder &MIRBuilder) const {
4480 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4481, __PRETTY_FUNCTION__))
4481 "Unexpected MachineOperand")((LHS.isReg() && RHS.isReg() && Predicate.isPredicate
() && "Unexpected MachineOperand") ? static_cast<void
> (0) : __assert_fail ("LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && \"Unexpected MachineOperand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4481, __PRETTY_FUNCTION__))
;
4482 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4483 // We want to find this sort of thing:
4484 // x = G_SUB 0, y
4485 // G_ICMP z, x
4486 //
4487 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4488 // e.g:
4489 //
4490 // cmn z, y
4491
4492 // Helper lambda to detect the subtract followed by the compare.
4493 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4494 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4495 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4496 return false;
4497
4498 // Need to make sure NZCV is the same at the end of the transformation.
4499 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4500 return false;
4501
4502 // We want to match against SUBs.
4503 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4504 return false;
4505
4506 // Make sure that we're getting
4507 // x = G_SUB 0, y
4508 auto ValAndVReg =
4509 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4510 if (!ValAndVReg || ValAndVReg->Value != 0)
4511 return false;
4512
4513 // This can safely be represented as a CMN.
4514 return true;
4515 };
4516
4517 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4518 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4519 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4520 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4521 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4522
4523 // Given this:
4524 //
4525 // x = G_SUB 0, y
4526 // G_ICMP x, z
4527 //
4528 // Produce this:
4529 //
4530 // cmn y, z
4531 if (IsCMN(LHSDef, CC))
4532 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4533
4534 // Same idea here, but with the RHS of the compare instead:
4535 //
4536 // Given this:
4537 //
4538 // x = G_SUB 0, y
4539 // G_ICMP z, x
4540 //
4541 // Produce this:
4542 //
4543 // cmn z, y
4544 if (IsCMN(RHSDef, CC))
4545 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4546
4547 // Given this:
4548 //
4549 // z = G_AND x, y
4550 // G_ICMP z, 0
4551 //
4552 // Produce this if the compare is signed:
4553 //
4554 // tst x, y
4555 if (!CmpInst::isUnsigned(P) && LHSDef &&
4556 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4557 // Make sure that the RHS is 0.
4558 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4559 if (!ValAndVReg || ValAndVReg->Value != 0)
4560 return nullptr;
4561
4562 return emitTST(LHSDef->getOperand(1),
4563 LHSDef->getOperand(2), MIRBuilder);
4564 }
4565
4566 return nullptr;
4567}
4568
4569bool AArch64InstructionSelector::selectShuffleVector(
4570 MachineInstr &I, MachineRegisterInfo &MRI) const {
4571 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4572 Register Src1Reg = I.getOperand(1).getReg();
4573 const LLT Src1Ty = MRI.getType(Src1Reg);
4574 Register Src2Reg = I.getOperand(2).getReg();
4575 const LLT Src2Ty = MRI.getType(Src2Reg);
4576 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4577
4578 MachineBasicBlock &MBB = *I.getParent();
4579 MachineFunction &MF = *MBB.getParent();
4580 LLVMContext &Ctx = MF.getFunction().getContext();
4581
4582 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4583 // it's originated from a <1 x T> type. Those should have been lowered into
4584 // G_BUILD_VECTOR earlier.
4585 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4586 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"
; } } while (false)
;
4587 return false;
4588 }
4589
4590 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4591
4592 SmallVector<Constant *, 64> CstIdxs;
4593 for (int Val : Mask) {
4594 // For now, any undef indexes we'll just assume to be 0. This should be
4595 // optimized in future, e.g. to select DUP etc.
4596 Val = Val < 0 ? 0 : Val;
4597 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4598 unsigned Offset = Byte + Val * BytesPerElt;
4599 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4600 }
4601 }
4602
4603 MachineIRBuilder MIRBuilder(I);
4604
4605 // Use a constant pool to load the index vector for TBL.
4606 Constant *CPVal = ConstantVector::get(CstIdxs);
4607 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4608 if (!IndexLoad) {
4609 LLVM_DEBUG(dbgs() << "Could not load from a constant pool")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not load from a constant pool"
; } } while (false)
;
4610 return false;
4611 }
4612
4613 if (DstTy.getSizeInBits() != 128) {
4614 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty")((DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"
) ? static_cast<void> (0) : __assert_fail ("DstTy.getSizeInBits() == 64 && \"Unexpected shuffle result ty\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4614, __PRETTY_FUNCTION__))
;
4615 // This case can be done with TBL1.
4616 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4617 if (!Concat) {
4618 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not do vector concat for tbl1"
; } } while (false)
;
4619 return false;
4620 }
4621
4622 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4623 IndexLoad =
4624 emitScalarToVector(64, &AArch64::FPR128RegClass,
4625 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4626
4627 auto TBL1 = MIRBuilder.buildInstr(
4628 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4629 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4630 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4631
4632 auto Copy =
4633 MIRBuilder
4634 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4635 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4636 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4637 I.eraseFromParent();
4638 return true;
4639 }
4640
4641 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4642 // Q registers for regalloc.
4643 auto RegSeq = MIRBuilder
4644 .buildInstr(TargetOpcode::REG_SEQUENCE,
4645 {&AArch64::QQRegClass}, {Src1Reg})
4646 .addImm(AArch64::qsub0)
4647 .addUse(Src2Reg)
4648 .addImm(AArch64::qsub1);
4649
4650 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4651 {RegSeq, IndexLoad->getOperand(0)});
4652 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4653 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4654 I.eraseFromParent();
4655 return true;
4656}
4657
4658MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4659 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4660 unsigned LaneIdx, const RegisterBank &RB,
4661 MachineIRBuilder &MIRBuilder) const {
4662 MachineInstr *InsElt = nullptr;
4663 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4664 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4665
4666 // Create a register to define with the insert if one wasn't passed in.
4667 if (!DstReg)
4668 DstReg = MRI.createVirtualRegister(DstRC);
4669
4670 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4671 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4672
4673 if (RB.getID() == AArch64::FPRRegBankID) {
4674 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4675 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4676 .addImm(LaneIdx)
4677 .addUse(InsSub->getOperand(0).getReg())
4678 .addImm(0);
4679 } else {
4680 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4681 .addImm(LaneIdx)
4682 .addUse(EltReg);
4683 }
4684
4685 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4686 return InsElt;
4687}
4688
4689bool AArch64InstructionSelector::selectInsertElt(
4690 MachineInstr &I, MachineRegisterInfo &MRI) const {
4691 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)((I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4691, __PRETTY_FUNCTION__))
;
4692
4693 // Get information on the destination.
4694 Register DstReg = I.getOperand(0).getReg();
4695 const LLT DstTy = MRI.getType(DstReg);
4696 unsigned VecSize = DstTy.getSizeInBits();
4697
4698 // Get information on the element we want to insert into the destination.
4699 Register EltReg = I.getOperand(2).getReg();
4700 const LLT EltTy = MRI.getType(EltReg);
4701 unsigned EltSize = EltTy.getSizeInBits();
4702 if (EltSize < 16 || EltSize > 64)
4703 return false; // Don't support all element types yet.
4704
4705 // Find the definition of the index. Bail out if it's not defined by a
4706 // G_CONSTANT.
4707 Register IdxReg = I.getOperand(3).getReg();
4708 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4709 if (!VRegAndVal)
4710 return false;
4711 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4712
4713 // Perform the lane insert.
4714 Register SrcReg = I.getOperand(1).getReg();
4715 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4716 MachineIRBuilder MIRBuilder(I);
4717
4718 if (VecSize < 128) {
4719 // If the vector we're inserting into is smaller than 128 bits, widen it
4720 // to 128 to do the insert.
4721 MachineInstr *ScalarToVec = emitScalarToVector(
4722 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4723 if (!ScalarToVec)
4724 return false;
4725 SrcReg = ScalarToVec->getOperand(0).getReg();
4726 }
4727
4728 // Create an insert into a new FPR128 register.
4729 // Note that if our vector is already 128 bits, we end up emitting an extra
4730 // register.
4731 MachineInstr *InsMI =
4732 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4733
4734 if (VecSize < 128) {
4735 // If we had to widen to perform the insert, then we have to demote back to
4736 // the original size to get the result we want.
4737 Register DemoteVec = InsMI->getOperand(0).getReg();
4738 const TargetRegisterClass *RC =
4739 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4740 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4741 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4742 return false;
4743 }
4744 unsigned SubReg = 0;
4745 if (!getSubRegForClass(RC, TRI, SubReg))
4746 return false;
4747 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4748 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
4749 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< VecSize << "\n"; } } while (false)
;
4750 return false;
4751 }
4752 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4753 .addReg(DemoteVec, 0, SubReg);
4754 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4755 } else {
4756 // No widening needed.
4757 InsMI->getOperand(0).setReg(DstReg);
4758 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4759 }
4760
4761 I.eraseFromParent();
4762 return true;
4763}
4764
4765bool AArch64InstructionSelector::tryOptConstantBuildVec(
4766 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4767 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4767, __PRETTY_FUNCTION__))
;
4768 unsigned DstSize = DstTy.getSizeInBits();
4769 assert(DstSize <= 128 && "Unexpected build_vec type!")((DstSize <= 128 && "Unexpected build_vec type!") ?
static_cast<void> (0) : __assert_fail ("DstSize <= 128 && \"Unexpected build_vec type!\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4769, __PRETTY_FUNCTION__))
;
4770 if (DstSize < 32)
4771 return false;
4772 // Check if we're building a constant vector, in which case we want to
4773 // generate a constant pool load instead of a vector insert sequence.
4774 SmallVector<Constant *, 16> Csts;
4775 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4776 // Try to find G_CONSTANT or G_FCONSTANT
4777 auto *OpMI =
4778 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4779 if (OpMI)
4780 Csts.emplace_back(
4781 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4782 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4783 I.getOperand(Idx).getReg(), MRI)))
4784 Csts.emplace_back(
4785 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4786 else
4787 return false;
4788 }
4789 Constant *CV = ConstantVector::get(Csts);
4790 MachineIRBuilder MIB(I);
4791 if (CV->isNullValue()) {
4792 // Until the importer can support immAllZerosV in pattern leaf nodes,
4793 // select a zero move manually here.
4794 Register DstReg = I.getOperand(0).getReg();
4795 if (DstSize == 128) {
4796 auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0);
4797 I.eraseFromParent();
4798 return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
4799 } else if (DstSize == 64) {
4800 auto Mov =
4801 MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4802 .addImm(0);
4803 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4804 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4805 I.eraseFromParent();
4806 return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI);
4807 }
4808 }
4809 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4810 if (!CPLoad) {
4811 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Could not generate cp load for build_vector"
; } } while (false)
;
4812 return false;
4813 }
4814 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4815 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4816 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4817 MRI);
4818 I.eraseFromParent();
4819 return true;
4820}
4821
4822bool AArch64InstructionSelector::selectBuildVector(
4823 MachineInstr &I, MachineRegisterInfo &MRI) const {
4824 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((I.getOpcode() == TargetOpcode::G_BUILD_VECTOR) ? static_cast
<void> (0) : __assert_fail ("I.getOpcode() == TargetOpcode::G_BUILD_VECTOR"
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4824, __PRETTY_FUNCTION__))
;
4825 // Until we port more of the optimized selections, for now just use a vector
4826 // insert sequence.
4827 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4828 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4829 unsigned EltSize = EltTy.getSizeInBits();
4830
4831 if (tryOptConstantBuildVec(I, DstTy, MRI))
4832 return true;
4833 if (EltSize < 16 || EltSize > 64)
4834 return false; // Don't support all element types yet.
4835 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4836 MachineIRBuilder MIRBuilder(I);
4837
4838 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4839 MachineInstr *ScalarToVec =
4840 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4841 I.getOperand(1).getReg(), MIRBuilder);
4842 if (!ScalarToVec)
4843 return false;
4844
4845 Register DstVec = ScalarToVec->getOperand(0).getReg();
4846 unsigned DstSize = DstTy.getSizeInBits();
4847
4848 // Keep track of the last MI we inserted. Later on, we might be able to save
4849 // a copy using it.
4850 MachineInstr *PrevMI = nullptr;
4851 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4852 // Note that if we don't do a subregister copy, we can end up making an
4853 // extra register.
4854 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4855 MIRBuilder);
4856 DstVec = PrevMI->getOperand(0).getReg();
4857 }
4858
4859 // If DstTy's size in bits is less than 128, then emit a subregister copy
4860 // from DstVec to the last register we've defined.
4861 if (DstSize < 128) {
4862 // Force this to be FPR using the destination vector.
4863 const TargetRegisterClass *RC =
4864 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4865 if (!RC)
4866 return false;
4867 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4868 LLVM_DEBUG(dbgs() << "Unsupported register class!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported register class!\n"
; } } while (false)
;
4869 return false;
4870 }
4871
4872 unsigned SubReg = 0;
4873 if (!getSubRegForClass(RC, TRI, SubReg))
4874 return false;
4875 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4876 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSizedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
4877 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-isel")) { dbgs() << "Unsupported destination size! ("
<< DstSize << "\n"; } } while (false)
;
4878 return false;
4879 }
4880
4881 Register Reg = MRI.createVirtualRegister(RC);
4882 Register DstReg = I.getOperand(0).getReg();
4883
4884 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4885 .addReg(DstVec, 0, SubReg);
4886 MachineOperand &RegOp = I.getOperand(1);
4887 RegOp.setReg(Reg);
4888 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4889 } else {
4890 // We don't need a subregister copy. Save a copy by re-using the
4891 // destination register on the final insert.
4892 assert(PrevMI && "PrevMI was null?")((PrevMI && "PrevMI was null?") ? static_cast<void
> (0) : __assert_fail ("PrevMI && \"PrevMI was null?\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 4892, __PRETTY_FUNCTION__))
;
4893 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4894 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4895 }
4896
4897 I.eraseFromParent();
4898 return true;
4899}
4900
4901/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4902/// ID if it exists, and 0 otherwise.
4903static unsigned findIntrinsicID(MachineInstr &I) {
4904 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4905 return Op.isIntrinsicID();
4906 });
4907 if (IntrinOp == I.operands_end())
4908 return 0;
4909 return IntrinOp->getIntrinsicID();
4910}
4911
4912bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4913 MachineInstr &I, MachineRegisterInfo &MRI) const {
4914 // Find the intrinsic ID.
4915 unsigned IntrinID = findIntrinsicID(I);
4916 if (!IntrinID)
4917 return false;
4918 MachineIRBuilder MIRBuilder(I);
4919
4920 // Select the instruction.
4921 switch (IntrinID) {
4922 default:
4923 return false;
4924 case Intrinsic::trap:
4925 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4926 break;
4927 case Intrinsic::debugtrap:
4928 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4929 break;
4930 case Intrinsic::ubsantrap:
4931 MIRBuilder.buildInstr(AArch64::BRK, {}, {})
4932 .addImm(I.getOperand(1).getImm() | ('U' << 8));
4933 break;
4934 }
4935
4936 I.eraseFromParent();
4937 return true;
4938}
4939
4940bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4941 MachineRegisterInfo &MRI) {
4942 unsigned IntrinID = findIntrinsicID(I);
4943 if (!IntrinID)
4944 return false;
4945 MachineIRBuilder MIRBuilder(I);
4946
4947 switch (IntrinID) {
4948 default:
4949 break;
4950 case Intrinsic::aarch64_crypto_sha1h: {
4951 Register DstReg = I.getOperand(0).getReg();
4952 Register SrcReg = I.getOperand(2).getReg();
4953
4954 // FIXME: Should this be an assert?
4955 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4956 MRI.getType(SrcReg).getSizeInBits() != 32)
4957 return false;
4958
4959 // The operation has to happen on FPRs. Set up some new FPR registers for
4960 // the source and destination if they are on GPRs.
4961 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4962 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4963 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4964
4965 // Make sure the copy ends up getting constrained properly.
4966 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4967 AArch64::GPR32RegClass, MRI);
4968 }
4969
4970 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4971 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4972
4973 // Actually insert the instruction.
4974 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4975 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4976
4977 // Did we create a new register for the destination?
4978 if (DstReg != I.getOperand(0).getReg()) {
4979 // Yep. Copy the result of the instruction back into the original
4980 // destination.
4981 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4982 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4983 AArch64::GPR32RegClass, MRI);
4984 }
4985
4986 I.eraseFromParent();
4987 return true;
4988 }
4989 case Intrinsic::frameaddress:
4990 case Intrinsic::returnaddress: {
4991 MachineFunction &MF = *I.getParent()->getParent();
4992 MachineFrameInfo &MFI = MF.getFrameInfo();
4993
4994 unsigned Depth = I.getOperand(2).getImm();
4995 Register DstReg = I.getOperand(0).getReg();
4996 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4997
4998 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4999 if (!MFReturnAddr) {
5000 // Insert the copy from LR/X30 into the entry block, before it can be
5001 // clobbered by anything.
5002 MFI.setReturnAddressIsTaken(true);
5003 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5004 AArch64::GPR64RegClass);
5005 }
5006
5007 if (STI.hasPAuth()) {
5008 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5009 } else {
5010 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5011 MIRBuilder.buildInstr(AArch64::XPACLRI);
5012 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5013 }
5014
5015 I.eraseFromParent();
5016 return true;
5017 }
5018
5019 MFI.setFrameAddressIsTaken(true);
5020 Register FrameAddr(AArch64::FP);
5021 while (Depth--) {
5022 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5023 auto Ldr =
5024 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
5025 .addImm(0);
5026 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5027 FrameAddr = NextFrame;
5028 }
5029
5030 if (IntrinID == Intrinsic::frameaddress)
5031 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
5032 else {
5033 MFI.setReturnAddressIsTaken(true);
5034
5035 if (STI.hasPAuth()) {
5036 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5037 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5038 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5039 } else {
5040 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
5041 MIRBuilder.buildInstr(AArch64::XPACLRI);
5042 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
5043 }
5044 }
5045
5046 I.eraseFromParent();
5047 return true;
5048 }
5049 }
5050 return false;
5051}
5052
5053InstructionSelector::ComplexRendererFns
5054AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5055 auto MaybeImmed = getImmedFromMO(Root);
5056 if (MaybeImmed == None || *MaybeImmed > 31)
5057 return None;
5058 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5059 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5060}
5061
5062InstructionSelector::ComplexRendererFns
5063AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5064 auto MaybeImmed = getImmedFromMO(Root);
5065 if (MaybeImmed == None || *MaybeImmed > 31)
5066 return None;
5067 uint64_t Enc = 31 - *MaybeImmed;
5068 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5069}
5070
5071InstructionSelector::ComplexRendererFns
5072AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5073 auto MaybeImmed = getImmedFromMO(Root);
5074 if (MaybeImmed == None || *MaybeImmed > 63)
5075 return None;
5076 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5077 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5078}
5079
5080InstructionSelector::ComplexRendererFns
5081AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5082 auto MaybeImmed = getImmedFromMO(Root);
5083 if (MaybeImmed == None || *MaybeImmed > 63)
5084 return None;
5085 uint64_t Enc = 63 - *MaybeImmed;
5086 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5087}
5088
5089/// Helper to select an immediate value that can be represented as a 12-bit
5090/// value shifted left by either 0 or 12. If it is possible to do so, return
5091/// the immediate and shift value. If not, return None.
5092///
5093/// Used by selectArithImmed and selectNegArithImmed.
5094InstructionSelector::ComplexRendererFns
5095AArch64InstructionSelector::select12BitValueWithLeftShift(
5096 uint64_t Immed) const {
5097 unsigned ShiftAmt;
5098 if (Immed >> 12 == 0) {
5099 ShiftAmt = 0;
5100 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5101 ShiftAmt = 12;
5102 Immed = Immed >> 12;
5103 } else
5104 return None;
5105
5106 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5107 return {{
5108 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5109 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5110 }};
5111}
5112
5113/// SelectArithImmed - Select an immediate value that can be represented as
5114/// a 12-bit value shifted left by either 0 or 12. If so, return true with
5115/// Val set to the 12-bit value and Shift set to the shifter operand.
5116InstructionSelector::ComplexRendererFns
5117AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5118 // This function is called from the addsub_shifted_imm ComplexPattern,
5119 // which lists [imm] as the list of opcode it's interested in, however
5120 // we still need to check whether the operand is actually an immediate
5121 // here because the ComplexPattern opcode list is only used in
5122 // root-level opcode matching.
5123 auto MaybeImmed = getImmedFromMO(Root);
5124 if (MaybeImmed == None)
5125 return None;
5126 return select12BitValueWithLeftShift(*MaybeImmed);
5127}
5128
5129/// SelectNegArithImmed - As above, but negates the value before trying to
5130/// select it.
5131InstructionSelector::ComplexRendererFns
5132AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5133 // We need a register here, because we need to know if we have a 64 or 32
5134 // bit immediate.
5135 if (!Root.isReg())
5136 return None;
5137 auto MaybeImmed = getImmedFromMO(Root);
5138 if (MaybeImmed == None)
5139 return None;
5140 uint64_t Immed = *MaybeImmed;
5141
5142 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5143 // have the opposite effect on the C flag, so this pattern mustn't match under
5144 // those circumstances.
5145 if (Immed == 0)
5146 return None;
5147
5148 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5149 // the root.
5150 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5151 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5152 Immed = ~((uint32_t)Immed) + 1;
5153 else
5154 Immed = ~Immed + 1ULL;
5155
5156 if (Immed & 0xFFFFFFFFFF000000ULL)
5157 return None;
5158
5159 Immed &= 0xFFFFFFULL;
5160 return select12BitValueWithLeftShift(Immed);
5161}
5162
5163/// Return true if it is worth folding MI into an extended register. That is,
5164/// if it's safe to pull it into the addressing mode of a load or store as a
5165/// shift.
5166bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5167 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5168 // Always fold if there is one use, or if we're optimizing for size.
5169 Register DefReg = MI.getOperand(0).getReg();
5170 if (MRI.hasOneNonDBGUse(DefReg) ||
5171 MI.getParent()->getParent()->getFunction().hasMinSize())
5172 return true;
5173
5174 // It's better to avoid folding and recomputing shifts when we don't have a
5175 // fastpath.
5176 if (!STI.hasLSLFast())
5177 return false;
5178
5179 // We have a fastpath, so folding a shift in and potentially computing it
5180 // many times may be beneficial. Check if this is only used in memory ops.
5181 // If it is, then we should fold.
5182 return all_of(MRI.use_nodbg_instructions(DefReg),
5183 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5184}
5185
5186static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5187 switch (Type) {
5188 case AArch64_AM::SXTB:
5189 case AArch64_AM::SXTH:
5190 case AArch64_AM::SXTW:
5191 return true;
5192 default:
5193 return false;
5194 }
5195}
5196
5197InstructionSelector::ComplexRendererFns
5198AArch64InstructionSelector::selectExtendedSHL(
5199 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5200 unsigned SizeInBytes, bool WantsExt) const {
5201 assert(Base.isReg() && "Expected base to be a register operand")((Base.isReg() && "Expected base to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Base.isReg() && \"Expected base to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5201, __PRETTY_FUNCTION__))
;
5202 assert(Offset.isReg() && "Expected offset to be a register operand")((Offset.isReg() && "Expected offset to be a register operand"
) ? static_cast<void> (0) : __assert_fail ("Offset.isReg() && \"Expected offset to be a register operand\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5202, __PRETTY_FUNCTION__))
;
5203
5204 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5205 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5206 if (!OffsetInst)
5207 return None;
5208
5209 unsigned OffsetOpc = OffsetInst->getOpcode();
5210 bool LookedThroughZExt = false;
5211 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5212 // Try to look through a ZEXT.
5213 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5214 return None;
5215
5216 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5217 OffsetOpc = OffsetInst->getOpcode();
5218 LookedThroughZExt = true;
5219
5220 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5221 return None;
5222 }
5223 // Make sure that the memory op is a valid size.
5224 int64_t LegalShiftVal = Log2_32(SizeInBytes);
5225 if (LegalShiftVal == 0)
5226 return None;
5227 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5228 return None;
5229
5230 // Now, try to find the specific G_CONSTANT. Start by assuming that the
5231 // register we will offset is the LHS, and the register containing the
5232 // constant is the RHS.
5233 Register OffsetReg = OffsetInst->getOperand(1).getReg();
5234 Register ConstantReg = OffsetInst->getOperand(2).getReg();
5235 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5236 if (!ValAndVReg) {
5237 // We didn't get a constant on the RHS. If the opcode is a shift, then
5238 // we're done.
5239 if (OffsetOpc == TargetOpcode::G_SHL)
5240 return None;
5241
5242 // If we have a G_MUL, we can use either register. Try looking at the RHS.
5243 std::swap(OffsetReg, ConstantReg);
5244 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
5245 if (!ValAndVReg)
5246 return None;
5247 }
5248
5249 // The value must fit into 3 bits, and must be positive. Make sure that is
5250 // true.
5251 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5252
5253 // Since we're going to pull this into a shift, the constant value must be
5254 // a power of 2. If we got a multiply, then we need to check this.
5255 if (OffsetOpc == TargetOpcode::G_MUL) {
5256 if (!isPowerOf2_32(ImmVal))
5257 return None;
5258
5259 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5260 ImmVal = Log2_32(ImmVal);
5261 }
5262
5263 if ((ImmVal & 0x7) != ImmVal)
5264 return None;
5265
5266 // We are only allowed to shift by LegalShiftVal. This shift value is built
5267 // into the instruction, so we can't just use whatever we want.
5268 if (ImmVal != LegalShiftVal)
5269 return None;
5270
5271 unsigned SignExtend = 0;
5272 if (WantsExt) {
5273 // Check if the offset is defined by an extend, unless we looked through a
5274 // G_ZEXT earlier.
5275 if (!LookedThroughZExt) {
5276 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5277 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5278 if (Ext == AArch64_AM::InvalidShiftExtend)
5279 return None;
5280
5281 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5282 // We only support SXTW for signed extension here.
5283 if (SignExtend && Ext != AArch64_AM::SXTW)
5284 return None;
5285 OffsetReg = ExtInst->getOperand(1).getReg();
5286 }
5287
5288 // Need a 32-bit wide register here.
5289 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5290 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5291 }
5292
5293 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5294 // offset. Signify that we are shifting by setting the shift flag to 1.
5295 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5296 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5297 [=](MachineInstrBuilder &MIB) {
5298 // Need to add both immediates here to make sure that they are both
5299 // added to the instruction.
5300 MIB.addImm(SignExtend);
5301 MIB.addImm(1);
5302 }}};
5303}
5304
5305/// This is used for computing addresses like this:
5306///
5307/// ldr x1, [x2, x3, lsl #3]
5308///
5309/// Where x2 is the base register, and x3 is an offset register. The shift-left
5310/// is a constant value specific to this load instruction. That is, we'll never
5311/// see anything other than a 3 here (which corresponds to the size of the
5312/// element being loaded.)
5313InstructionSelector::ComplexRendererFns
5314AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5315 MachineOperand &Root, unsigned SizeInBytes) const {
5316 if (!Root.isReg())
5317 return None;
5318 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5319
5320 // We want to find something like this:
5321 //
5322 // val = G_CONSTANT LegalShiftVal
5323 // shift = G_SHL off_reg val
5324 // ptr = G_PTR_ADD base_reg shift
5325 // x = G_LOAD ptr
5326 //
5327 // And fold it into this addressing mode:
5328 //
5329 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5330
5331 // Check if we can find the G_PTR_ADD.
5332 MachineInstr *PtrAdd =
5333 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5334 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5335 return None;
5336
5337 // Now, try to match an opcode which will match our specific offset.
5338 // We want a G_SHL or a G_MUL.
5339 MachineInstr *OffsetInst =
5340 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5341 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5342 OffsetInst->getOperand(0), SizeInBytes,
5343 /*WantsExt=*/false);
5344}
5345
5346/// This is used for computing addresses like this:
5347///
5348/// ldr x1, [x2, x3]
5349///
5350/// Where x2 is the base register, and x3 is an offset register.
5351///
5352/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5353/// this will do so. Otherwise, it will return None.
5354InstructionSelector::ComplexRendererFns
5355AArch64InstructionSelector::selectAddrModeRegisterOffset(
5356 MachineOperand &Root) const {
5357 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5358
5359 // We need a GEP.
5360 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5361 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5362 return None;
5363
5364 // If this is used more than once, let's not bother folding.
5365 // TODO: Check if they are memory ops. If they are, then we can still fold
5366 // without having to recompute anything.
5367 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5368 return None;
5369
5370 // Base is the GEP's LHS, offset is its RHS.
5371 return {{[=](MachineInstrBuilder &MIB) {
5372 MIB.addUse(Gep->getOperand(1).getReg());
5373 },
5374 [=](MachineInstrBuilder &MIB) {
5375 MIB.addUse(Gep->getOperand(2).getReg());
5376 },
5377 [=](MachineInstrBuilder &MIB) {
5378 // Need to add both immediates here to make sure that they are both
5379 // added to the instruction.
5380 MIB.addImm(0);
5381 MIB.addImm(0);
5382 }}};
5383}
5384
5385/// This is intended to be equivalent to selectAddrModeXRO in
5386/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5387InstructionSelector::ComplexRendererFns
5388AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5389 unsigned SizeInBytes) const {
5390 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5391 if (!Root.isReg())
5392 return None;
5393 MachineInstr *PtrAdd =
5394 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5395 if (!PtrAdd)
5396 return None;
5397
5398 // Check for an immediates which cannot be encoded in the [base + imm]
5399 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5400 // end up with code like:
5401 //
5402 // mov x0, wide
5403 // add x1 base, x0
5404 // ldr x2, [x1, x0]
5405 //
5406 // In this situation, we can use the [base, xreg] addressing mode to save an
5407 // add/sub:
5408 //
5409 // mov x0, wide
5410 // ldr x2, [base, x0]
5411 auto ValAndVReg =
5412 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5413 if (ValAndVReg) {
5414 unsigned Scale = Log2_32(SizeInBytes);
5415 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5416
5417 // Skip immediates that can be selected in the load/store addresing
5418 // mode.
5419 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5420 ImmOff < (0x1000 << Scale))
5421 return None;
5422
5423 // Helper lambda to decide whether or not it is preferable to emit an add.
5424 auto isPreferredADD = [](int64_t ImmOff) {
5425 // Constants in [0x0, 0xfff] can be encoded in an add.
5426 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5427 return true;
5428
5429 // Can it be encoded in an add lsl #12?
5430 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5431 return false;
5432
5433 // It can be encoded in an add lsl #12, but we may not want to. If it is
5434 // possible to select this as a single movz, then prefer that. A single
5435 // movz is faster than an add with a shift.
5436 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5437 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5438 };
5439
5440 // If the immediate can be encoded in a single add/sub, then bail out.
5441 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5442 return None;
5443 }
5444
5445 // Try to fold shifts into the addressing mode.
5446 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5447 if (AddrModeFns)
5448 return AddrModeFns;
5449
5450 // If that doesn't work, see if it's possible to fold in registers from
5451 // a GEP.
5452 return selectAddrModeRegisterOffset(Root);
5453}
5454
5455/// This is used for computing addresses like this:
5456///
5457/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5458///
5459/// Where we have a 64-bit base register, a 32-bit offset register, and an
5460/// extend (which may or may not be signed).
5461InstructionSelector::ComplexRendererFns
5462AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5463 unsigned SizeInBytes) const {
5464 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5465
5466 MachineInstr *PtrAdd =
5467 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5468 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5469 return None;
5470
5471 MachineOperand &LHS = PtrAdd->getOperand(1);
5472 MachineOperand &RHS = PtrAdd->getOperand(2);
5473 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5474
5475 // The first case is the same as selectAddrModeXRO, except we need an extend.
5476 // In this case, we try to find a shift and extend, and fold them into the
5477 // addressing mode.
5478 //
5479 // E.g.
5480 //
5481 // off_reg = G_Z/S/ANYEXT ext_reg
5482 // val = G_CONSTANT LegalShiftVal
5483 // shift = G_SHL off_reg val
5484 // ptr = G_PTR_ADD base_reg shift
5485 // x = G_LOAD ptr
5486 //
5487 // In this case we can get a load like this:
5488 //
5489 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5490 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5491 SizeInBytes, /*WantsExt=*/true);
5492 if (ExtendedShl)
5493 return ExtendedShl;
5494
5495 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5496 //
5497 // e.g.
5498 // ldr something, [base_reg, ext_reg, sxtw]
5499 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5500 return None;
5501
5502 // Check if this is an extend. We'll get an extend type if it is.
5503 AArch64_AM::ShiftExtendType Ext =
5504 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5505 if (Ext == AArch64_AM::InvalidShiftExtend)
5506 return None;
5507
5508 // Need a 32-bit wide register.
5509 MachineIRBuilder MIB(*PtrAdd);
5510 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5511 AArch64::GPR32RegClass, MIB);
5512 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5513
5514 // Base is LHS, offset is ExtReg.
5515 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5516 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5517 [=](MachineInstrBuilder &MIB) {
5518 MIB.addImm(SignExtend);
5519 MIB.addImm(0);
5520 }}};
5521}
5522
5523/// Select a "register plus unscaled signed 9-bit immediate" address. This
5524/// should only match when there is an offset that is not valid for a scaled
5525/// immediate addressing mode. The "Size" argument is the size in bytes of the
5526/// memory reference, which is needed here to know what is valid for a scaled
5527/// immediate.
5528InstructionSelector::ComplexRendererFns
5529AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5530 unsigned Size) const {
5531 MachineRegisterInfo &MRI =
5532 Root.getParent()->getParent()->getParent()->getRegInfo();
5533
5534 if (!Root.isReg())
5535 return None;
5536
5537 if (!isBaseWithConstantOffset(Root, MRI))
5538 return None;
5539
5540 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5541 if (!RootDef)
5542 return None;
5543
5544 MachineOperand &OffImm = RootDef->getOperand(2);
5545 if (!OffImm.isReg())
5546 return None;
5547 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5548 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5549 return None;
5550 int64_t RHSC;
5551 MachineOperand &RHSOp1 = RHS->getOperand(1);
5552 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5553 return None;
5554 RHSC = RHSOp1.getCImm()->getSExtValue();
5555
5556 // If the offset is valid as a scaled immediate, don't match here.
5557 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5558 return None;
5559 if (RHSC >= -256 && RHSC < 256) {
5560 MachineOperand &Base = RootDef->getOperand(1);
5561 return {{
5562 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5563 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5564 }};
5565 }
5566 return None;
5567}
5568
5569InstructionSelector::ComplexRendererFns
5570AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5571 unsigned Size,
5572 MachineRegisterInfo &MRI) const {
5573 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5574 return None;
5575 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5576 if (Adrp.getOpcode() != AArch64::ADRP)
5577 return None;
5578
5579 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5580 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5581 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global")((Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"
) ? static_cast<void> (0) : __assert_fail ("Adrp.getOperand(1).getOffset() == 0 && \"Unexpected offset in global\""
, "/build/llvm-toolchain-snapshot-12~++20210124100612+2afaf072f5c1/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp"
, 5581, __PRETTY_FUNCTION__))
;
5582 auto GV = Adrp.getOperand(1).getGlobal();
5583 if (GV->isThreadLocal())
5584 return None;
5585
5586 auto &MF = *RootDef.getParent()->getParent();
5587 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5588 return None;
5589
5590 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5591 MachineIRBuilder MIRBuilder(RootDef);
5592 Register AdrpReg = Adrp.getOperand(0).getReg();
5593 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5594 [=](MachineInstrBuilder &MIB) {
5595 MIB.addGlobalAddress(GV, /* Offset */ 0,
5596 OpFlags | AArch64II::MO_PAGEOFF |
5597 AArch64II::MO_NC);
5598 }}};
5599}
5600
5601/// Select a "register plus scaled unsigned 12-bit immediate" address. The
5602/// "Size" argument is the size in bytes of the memory reference, which
5603/// determines the scale.
5604InstructionSelector::ComplexRendererFns
5605AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5606 unsigned Size) const {
5607 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5608 MachineRegisterInfo &MRI = MF.getRegInfo();
5609
5610 if (!Root.isReg())
4
Calling 'MachineOperand::isReg'
7
Returning from 'MachineOperand::isReg'
8
Taking false branch
5611 return None;
5612
5613 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5614 if (!RootDef)
9
Assuming 'RootDef' is non-null
10
Taking false branch
5615 return None;
5616
5617 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
11
Assuming the condition is false
12
Taking false branch
5618 return {{
5619 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5620 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5621 }};
5622 }
5623
5624 CodeModel::Model CM = MF.getTarget().getCodeModel();
5625 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5626 if (CM == CodeModel::Small) {
13
Assuming 'CM' is not equal to Small
14
Taking false branch
5627 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5628 if (OpFns)
5629 return OpFns;
5630 }
5631
5632 if (isBaseWithConstantOffset(Root, MRI)) {
15
Assuming the condition is true
16
Taking true branch
5633 MachineOperand &LHS = RootDef->getOperand(1);
5634 MachineOperand &RHS = RootDef->getOperand(2);
5635 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5636 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5637 if (LHSDef && RHSDef) {
17
Assuming 'LHSDef' is non-null
18
Assuming 'RHSDef' is non-null
19
Taking true branch
5638 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5639 unsigned Scale = Log2_32(Size);
20
Calling 'Log2_32'
22
Returning from 'Log2_32'
23
'Scale' initialized to 4294967295
5640 if ((RHSC & (Size - 1)) == 0 && RHSC
24.1
'RHSC' is >= 0
24.1
'RHSC' is >= 0
24.1
'RHSC' is >= 0
>= 0 && RHSC < (0x1000 << Scale)) {
24
Assuming the condition is true
25
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
5641 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5642 return {{
5643 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5644 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5645 }};
5646
5647 return {{
5648 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5649 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5650 }};
5651 }
5652 }
5653 }
5654
5655 // Before falling back to our general case, check if the unscaled
5656 // instructions can handle this. If so, that's preferable.
5657 if (selectAddrModeUnscaled(Root, Size).hasValue())
5658 return None;
5659
5660 return {{
5661 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5662 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5663 }};