Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetTransformInfo.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/TargetInstrInfo.h"
60#include "llvm/CodeGen/TargetLowering.h"
61#include "llvm/CodeGen/TargetOpcodes.h"
62#include "llvm/CodeGen/TargetRegisterInfo.h"
63#include "llvm/CodeGen/TargetSubtargetInfo.h"
64#include "llvm/CodeGen/ValueTypes.h"
65#include "llvm/IR/Attributes.h"
66#include "llvm/IR/CallingConv.h"
67#include "llvm/IR/Constant.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
71#include "llvm/IR/DerivedTypes.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GlobalAlias.h"
74#include "llvm/IR/GlobalValue.h"
75#include "llvm/IR/GlobalVariable.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/InlineAsm.h"
78#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Instructions.h"
80#include "llvm/IR/IntrinsicInst.h"
81#include "llvm/IR/Intrinsics.h"
82#include "llvm/IR/IntrinsicsARM.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/PatternMatch.h"
85#include "llvm/IR/Type.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/MC/MCInstrDesc.h"
89#include "llvm/MC/MCInstrItineraries.h"
90#include "llvm/MC/MCRegisterInfo.h"
91#include "llvm/MC/MCSchedule.h"
92#include "llvm/Support/AtomicOrdering.h"
93#include "llvm/Support/BranchProbability.h"
94#include "llvm/Support/Casting.h"
95#include "llvm/Support/CodeGen.h"
96#include "llvm/Support/CommandLine.h"
97#include "llvm/Support/Compiler.h"
98#include "llvm/Support/Debug.h"
99#include "llvm/Support/ErrorHandling.h"
100#include "llvm/Support/KnownBits.h"
101#include "llvm/Support/MachineValueType.h"
102#include "llvm/Support/MathExtras.h"
103#include "llvm/Support/raw_ostream.h"
104#include "llvm/Target/TargetMachine.h"
105#include "llvm/Target/TargetOptions.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <string>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE"arm-isel" "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls"}
;
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt"}
;
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments"}
;
125STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
126 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
;
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
133static cl::opt<bool> EnableConstpoolPromotion(
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
138static cl::opt<unsigned> ConstpoolPromotionMaxSize(
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
142static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
147cl::opt<unsigned>
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152// The APCS parameter registers.
153static const MCPhysReg GPRArgRegs[] = {
154 ARM::R0, ARM::R1, ARM::R2, ARM::R3
155};
156
157void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
158 MVT PromotedBitwiseVT) {
159 if (VT != PromotedLdStVT) {
160 setOperationAction(ISD::LOAD, VT, Promote);
161 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162
163 setOperationAction(ISD::STORE, VT, Promote);
164 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165 }
166
167 MVT ElemTy = VT.getVectorElementType();
168 if (ElemTy != MVT::f64)
169 setOperationAction(ISD::SETCC, VT, Custom);
170 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172 if (ElemTy == MVT::i32) {
173 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177 } else {
178 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182 }
183 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187 setOperationAction(ISD::SELECT, VT, Expand);
188 setOperationAction(ISD::SELECT_CC, VT, Expand);
189 setOperationAction(ISD::VSELECT, VT, Expand);
190 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191 if (VT.isInteger()) {
192 setOperationAction(ISD::SHL, VT, Custom);
193 setOperationAction(ISD::SRA, VT, Custom);
194 setOperationAction(ISD::SRL, VT, Custom);
195 }
196
197 // Promote all bit-wise operations.
198 if (VT.isInteger() && VT != PromotedBitwiseVT) {
199 setOperationAction(ISD::AND, VT, Promote);
200 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
201 setOperationAction(ISD::OR, VT, Promote);
202 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
203 setOperationAction(ISD::XOR, VT, Promote);
204 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
205 }
206
207 // Neon does not support vector divide/remainder operations.
208 setOperationAction(ISD::SDIV, VT, Expand);
209 setOperationAction(ISD::UDIV, VT, Expand);
210 setOperationAction(ISD::FDIV, VT, Expand);
211 setOperationAction(ISD::SREM, VT, Expand);
212 setOperationAction(ISD::UREM, VT, Expand);
213 setOperationAction(ISD::FREM, VT, Expand);
214 setOperationAction(ISD::SDIVREM, VT, Expand);
215 setOperationAction(ISD::UDIVREM, VT, Expand);
216
217 if (!VT.isFloatingPoint() &&
218 VT != MVT::v2i64 && VT != MVT::v1i64)
219 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
220 setOperationAction(Opcode, VT, Legal);
221 if (!VT.isFloatingPoint())
222 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
223 setOperationAction(Opcode, VT, Legal);
224}
225
226void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
227 addRegisterClass(VT, &ARM::DPRRegClass);
228 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
229}
230
231void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
232 addRegisterClass(VT, &ARM::DPairRegClass);
233 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
234}
235
236void ARMTargetLowering::setAllExpand(MVT VT) {
237 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
238 setOperationAction(Opc, VT, Expand);
239
240 // We support these really simple operations even on types where all
241 // the actual arithmetic has to be broken down into simpler
242 // operations or turned into library calls.
243 setOperationAction(ISD::BITCAST, VT, Legal);
244 setOperationAction(ISD::LOAD, VT, Legal);
245 setOperationAction(ISD::STORE, VT, Legal);
246 setOperationAction(ISD::UNDEF, VT, Legal);
247}
248
249void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
250 LegalizeAction Action) {
251 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
252 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
253 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
254}
255
256void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
257 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
258
259 for (auto VT : IntTypes) {
260 addRegisterClass(VT, &ARM::MQPRRegClass);
261 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
262 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
263 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
264 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
265 setOperationAction(ISD::SHL, VT, Custom);
266 setOperationAction(ISD::SRA, VT, Custom);
267 setOperationAction(ISD::SRL, VT, Custom);
268 setOperationAction(ISD::SMIN, VT, Legal);
269 setOperationAction(ISD::SMAX, VT, Legal);
270 setOperationAction(ISD::UMIN, VT, Legal);
271 setOperationAction(ISD::UMAX, VT, Legal);
272 setOperationAction(ISD::ABS, VT, Legal);
273 setOperationAction(ISD::SETCC, VT, Custom);
274 setOperationAction(ISD::MLOAD, VT, Custom);
275 setOperationAction(ISD::MSTORE, VT, Legal);
276 setOperationAction(ISD::CTLZ, VT, Legal);
277 setOperationAction(ISD::CTTZ, VT, Custom);
278 setOperationAction(ISD::BITREVERSE, VT, Legal);
279 setOperationAction(ISD::BSWAP, VT, Legal);
280 setOperationAction(ISD::SADDSAT, VT, Legal);
281 setOperationAction(ISD::UADDSAT, VT, Legal);
282 setOperationAction(ISD::SSUBSAT, VT, Legal);
283 setOperationAction(ISD::USUBSAT, VT, Legal);
284
285 // No native support for these.
286 setOperationAction(ISD::UDIV, VT, Expand);
287 setOperationAction(ISD::SDIV, VT, Expand);
288 setOperationAction(ISD::UREM, VT, Expand);
289 setOperationAction(ISD::SREM, VT, Expand);
290 setOperationAction(ISD::UDIVREM, VT, Expand);
291 setOperationAction(ISD::SDIVREM, VT, Expand);
292 setOperationAction(ISD::CTPOP, VT, Expand);
293 setOperationAction(ISD::SELECT, VT, Expand);
294 setOperationAction(ISD::SELECT_CC, VT, Expand);
295
296 // Vector reductions
297 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
298 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
299 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
300 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
301 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
302 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
303 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
304 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
305 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
306
307 if (!HasMVEFP) {
308 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
309 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
310 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
311 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
316 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
317 setIndexedLoadAction(im, VT, Legal);
318 setIndexedStoreAction(im, VT, Legal);
319 setIndexedMaskedLoadAction(im, VT, Legal);
320 setIndexedMaskedStoreAction(im, VT, Legal);
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
331 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336 setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338 setOperationAction(ISD::SETCC, VT, Custom);
339 setOperationAction(ISD::MLOAD, VT, Custom);
340 setOperationAction(ISD::MSTORE, VT, Legal);
341 setOperationAction(ISD::SELECT, VT, Expand);
342 setOperationAction(ISD::SELECT_CC, VT, Expand);
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
346 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
347 setIndexedLoadAction(im, VT, Legal);
348 setIndexedStoreAction(im, VT, Legal);
349 setIndexedMaskedLoadAction(im, VT, Legal);
350 setIndexedMaskedStoreAction(im, VT, Legal);
351 }
352
353 if (HasMVEFP) {
354 setOperationAction(ISD::FMINNUM, VT, Legal);
355 setOperationAction(ISD::FMAXNUM, VT, Legal);
356 setOperationAction(ISD::FROUND, VT, Legal);
357 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361
362 // No native support for these.
363 setOperationAction(ISD::FDIV, VT, Expand);
364 setOperationAction(ISD::FREM, VT, Expand);
365 setOperationAction(ISD::FSQRT, VT, Expand);
366 setOperationAction(ISD::FSIN, VT, Expand);
367 setOperationAction(ISD::FCOS, VT, Expand);
368 setOperationAction(ISD::FPOW, VT, Expand);
369 setOperationAction(ISD::FLOG, VT, Expand);
370 setOperationAction(ISD::FLOG2, VT, Expand);
371 setOperationAction(ISD::FLOG10, VT, Expand);
372 setOperationAction(ISD::FEXP, VT, Expand);
373 setOperationAction(ISD::FEXP2, VT, Expand);
374 setOperationAction(ISD::FNEARBYINT, VT, Expand);
375 }
376 }
377
378 // Custom Expand smaller than legal vector reductions to prevent false zero
379 // items being added.
380 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388
389 // We 'support' these types up to bitcast/load/store level, regardless of
390 // MVE integer-only / float support. Only doing FP data processing on the FP
391 // vector types is inhibited at integer-only level.
392 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393 for (auto VT : LongTypes) {
394 addRegisterClass(VT, &ARM::MQPRRegClass);
395 setAllExpand(VT);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399 }
400 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
401
402 // We can do bitwise operations on v2i64 vectors
403 setOperationAction(ISD::AND, MVT::v2i64, Legal);
404 setOperationAction(ISD::OR, MVT::v2i64, Legal);
405 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
406
407 // It is legal to extload from v4i8 to v4i16 or v4i32.
408 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
409 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
410 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
411
412 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
413 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
414 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
415 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
416 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
417 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
418
419 // Some truncating stores are legal too.
420 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
421 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
422 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
423
424 // Pre and Post inc on these are legal, given the correct extends
425 for (unsigned im = (unsigned)ISD::PRE_INC;
426 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
427 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
428 setIndexedLoadAction(im, VT, Legal);
429 setIndexedStoreAction(im, VT, Legal);
430 setIndexedMaskedLoadAction(im, VT, Legal);
431 setIndexedMaskedStoreAction(im, VT, Legal);
432 }
433 }
434
435 // Predicate types
436 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
437 for (auto VT : pTypes) {
438 addRegisterClass(VT, &ARM::VCCRRegClass);
439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
442 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
443 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
445 setOperationAction(ISD::SETCC, VT, Custom);
446 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
447 setOperationAction(ISD::LOAD, VT, Custom);
448 setOperationAction(ISD::STORE, VT, Custom);
449 setOperationAction(ISD::TRUNCATE, VT, Custom);
450 setOperationAction(ISD::VSELECT, VT, Expand);
451 setOperationAction(ISD::SELECT, VT, Expand);
452 }
453}
454
455ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
456 const ARMSubtarget &STI)
457 : TargetLowering(TM), Subtarget(&STI) {
458 RegInfo = Subtarget->getRegisterInfo();
459 Itins = Subtarget->getInstrItineraryData();
460
461 setBooleanContents(ZeroOrOneBooleanContent);
462 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
463
464 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
465 !Subtarget->isTargetWatchOS()) {
466 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
467 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
468 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
469 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
470 : CallingConv::ARM_AAPCS);
471 }
472
473 if (Subtarget->isTargetMachO()) {
474 // Uses VFP for Thumb libfuncs if available.
475 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
476 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
477 static const struct {
478 const RTLIB::Libcall Op;
479 const char * const Name;
480 const ISD::CondCode Cond;
481 } LibraryCalls[] = {
482 // Single-precision floating-point arithmetic.
483 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
484 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
485 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
486 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
487
488 // Double-precision floating-point arithmetic.
489 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
490 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
491 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
492 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
493
494 // Single-precision comparisons.
495 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
496 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
497 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
498 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
499 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
500 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
501 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
502
503 // Double-precision comparisons.
504 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
505 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
506 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
507 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
508 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
509 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
510 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
511
512 // Floating-point to integer conversions.
513 // i64 conversions are done via library routines even when generating VFP
514 // instructions, so use the same ones.
515 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
516 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
517 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
518 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
519
520 // Conversions between floating types.
521 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
522 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
523
524 // Integer to floating-point conversions.
525 // i64 conversions are done via library routines even when generating VFP
526 // instructions, so use the same ones.
527 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
528 // e.g., __floatunsidf vs. __floatunssidfvfp.
529 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
530 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
531 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
532 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
533 };
534
535 for (const auto &LC : LibraryCalls) {
536 setLibcallName(LC.Op, LC.Name);
537 if (LC.Cond != ISD::SETCC_INVALID)
538 setCmpLibcallCC(LC.Op, LC.Cond);
539 }
540 }
541 }
542
543 // These libcalls are not available in 32-bit.
544 setLibcallName(RTLIB::SHL_I128, nullptr);
545 setLibcallName(RTLIB::SRL_I128, nullptr);
546 setLibcallName(RTLIB::SRA_I128, nullptr);
547
548 // RTLIB
549 if (Subtarget->isAAPCS_ABI() &&
550 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
551 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
552 static const struct {
553 const RTLIB::Libcall Op;
554 const char * const Name;
555 const CallingConv::ID CC;
556 const ISD::CondCode Cond;
557 } LibraryCalls[] = {
558 // Double-precision floating-point arithmetic helper functions
559 // RTABI chapter 4.1.2, Table 2
560 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
561 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564
565 // Double-precision floating-point comparison helper functions
566 // RTABI chapter 4.1.2, Table 3
567 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
568 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
569 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
570 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
571 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
572 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
573 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
574
575 // Single-precision floating-point arithmetic helper functions
576 // RTABI chapter 4.1.2, Table 4
577 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
578 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581
582 // Single-precision floating-point comparison helper functions
583 // RTABI chapter 4.1.2, Table 5
584 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
585 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
586 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
587 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
588 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
589 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
590 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
591
592 // Floating-point to integer conversions.
593 // RTABI chapter 4.1.2, Table 6
594 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602
603 // Conversions between floating types.
604 // RTABI chapter 4.1.2, Table 7
605 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608
609 // Integer to floating-point conversions.
610 // RTABI chapter 4.1.2, Table 8
611 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619
620 // Long long helper functions
621 // RTABI chapter 4.2, Table 9
622 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626
627 // Integer division functions
628 // RTABI chapter 4.3.1
629 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 };
638
639 for (const auto &LC : LibraryCalls) {
640 setLibcallName(LC.Op, LC.Name);
641 setLibcallCallingConv(LC.Op, LC.CC);
642 if (LC.Cond != ISD::SETCC_INVALID)
643 setCmpLibcallCC(LC.Op, LC.Cond);
644 }
645
646 // EABI dependent RTLIB
647 if (TM.Options.EABIVersion == EABI::EABI4 ||
648 TM.Options.EABIVersion == EABI::EABI5) {
649 static const struct {
650 const RTLIB::Libcall Op;
651 const char *const Name;
652 const CallingConv::ID CC;
653 const ISD::CondCode Cond;
654 } MemOpsLibraryCalls[] = {
655 // Memory operations
656 // RTABI chapter 4.3.4
657 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 };
661
662 for (const auto &LC : MemOpsLibraryCalls) {
663 setLibcallName(LC.Op, LC.Name);
664 setLibcallCallingConv(LC.Op, LC.CC);
665 if (LC.Cond != ISD::SETCC_INVALID)
666 setCmpLibcallCC(LC.Op, LC.Cond);
667 }
668 }
669 }
670
671 if (Subtarget->isTargetWindows()) {
672 static const struct {
673 const RTLIB::Libcall Op;
674 const char * const Name;
675 const CallingConv::ID CC;
676 } LibraryCalls[] = {
677 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
678 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
679 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
680 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
681 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
682 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
683 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
684 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
685 };
686
687 for (const auto &LC : LibraryCalls) {
688 setLibcallName(LC.Op, LC.Name);
689 setLibcallCallingConv(LC.Op, LC.CC);
690 }
691 }
692
693 // Use divmod compiler-rt calls for iOS 5.0 and later.
694 if (Subtarget->isTargetMachO() &&
695 !(Subtarget->isTargetIOS() &&
696 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
697 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
698 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
699 }
700
701 // The half <-> float conversion functions are always soft-float on
702 // non-watchos platforms, but are needed for some targets which use a
703 // hard-float calling convention by default.
704 if (!Subtarget->isTargetWatchABI()) {
705 if (Subtarget->isAAPCS_ABI()) {
706 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
707 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
708 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
709 } else {
710 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
711 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
712 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
713 }
714 }
715
716 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
717 // a __gnu_ prefix (which is the default).
718 if (Subtarget->isTargetAEABI()) {
719 static const struct {
720 const RTLIB::Libcall Op;
721 const char * const Name;
722 const CallingConv::ID CC;
723 } LibraryCalls[] = {
724 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
725 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
726 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
727 };
728
729 for (const auto &LC : LibraryCalls) {
730 setLibcallName(LC.Op, LC.Name);
731 setLibcallCallingConv(LC.Op, LC.CC);
732 }
733 }
734
735 if (Subtarget->isThumb1Only())
736 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
737 else
738 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
739
740 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
741 Subtarget->hasFPRegs()) {
742 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
743 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
744 if (!Subtarget->hasVFP2Base())
745 setAllExpand(MVT::f32);
746 if (!Subtarget->hasFP64())
747 setAllExpand(MVT::f64);
748 }
749
750 if (Subtarget->hasFullFP16()) {
751 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
752 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
753 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
754
755 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
756 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
757 }
758
759 if (Subtarget->hasBF16()) {
760 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
761 setAllExpand(MVT::bf16);
762 if (!Subtarget->hasFullFP16())
763 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
764 }
765
766 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
767 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
768 setTruncStoreAction(VT, InnerVT, Expand);
769 addAllExtLoads(VT, InnerVT, Expand);
770 }
771
772 setOperationAction(ISD::MULHS, VT, Expand);
773 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
774 setOperationAction(ISD::MULHU, VT, Expand);
775 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
776
777 setOperationAction(ISD::BSWAP, VT, Expand);
778 }
779
780 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
781 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
782
783 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
784 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
785
786 if (Subtarget->hasMVEIntegerOps())
787 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
788
789 // Combine low-overhead loop intrinsics so that we can lower i1 types.
790 if (Subtarget->hasLOB()) {
791 setTargetDAGCombine(ISD::BRCOND);
792 setTargetDAGCombine(ISD::BR_CC);
793 }
794
795 if (Subtarget->hasNEON()) {
796 addDRTypeForNEON(MVT::v2f32);
797 addDRTypeForNEON(MVT::v8i8);
798 addDRTypeForNEON(MVT::v4i16);
799 addDRTypeForNEON(MVT::v2i32);
800 addDRTypeForNEON(MVT::v1i64);
801
802 addQRTypeForNEON(MVT::v4f32);
803 addQRTypeForNEON(MVT::v2f64);
804 addQRTypeForNEON(MVT::v16i8);
805 addQRTypeForNEON(MVT::v8i16);
806 addQRTypeForNEON(MVT::v4i32);
807 addQRTypeForNEON(MVT::v2i64);
808
809 if (Subtarget->hasFullFP16()) {
810 addQRTypeForNEON(MVT::v8f16);
811 addDRTypeForNEON(MVT::v4f16);
812 }
813
814 if (Subtarget->hasBF16()) {
815 addQRTypeForNEON(MVT::v8bf16);
816 addDRTypeForNEON(MVT::v4bf16);
817 }
818 }
819
820 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
821 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
822 // none of Neon, MVE or VFP supports any arithmetic operations on it.
823 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
824 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
825 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
826 // FIXME: Code duplication: FDIV and FREM are expanded always, see
827 // ARMTargetLowering::addTypeForNEON method for details.
828 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
829 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
830 // FIXME: Create unittest.
831 // In another words, find a way when "copysign" appears in DAG with vector
832 // operands.
833 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
834 // FIXME: Code duplication: SETCC has custom operation action, see
835 // ARMTargetLowering::addTypeForNEON method for details.
836 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
837 // FIXME: Create unittest for FNEG and for FABS.
838 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
839 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
840 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
841 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
842 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
843 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
844 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
845 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
846 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
847 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
848 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
849 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
850 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
851 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
852 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
853 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
854 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
855 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
856 }
857
858 if (Subtarget->hasNEON()) {
859 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
860 // supported for v4f32.
861 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
862 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
863 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
864 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
865 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
866 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
867 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
868 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
869 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
870 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
871 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
872 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
873 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
874 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
875
876 // Mark v2f32 intrinsics.
877 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
878 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
879 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
880 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
881 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
882 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
883 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
884 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
885 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
886 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
887 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
888 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
889 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
890 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
891
892 // Neon does not support some operations on v1i64 and v2i64 types.
893 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
894 // Custom handling for some quad-vector types to detect VMULL.
895 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
896 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
897 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
898 // Custom handling for some vector types to avoid expensive expansions
899 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
900 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
901 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
902 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
903 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
904 // a destination type that is wider than the source, and nor does
905 // it have a FP_TO_[SU]INT instruction with a narrower destination than
906 // source.
907 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
908 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
909 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
910 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
911 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
912 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
913 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
914 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
915
916 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
917 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
918
919 // NEON does not have single instruction CTPOP for vectors with element
920 // types wider than 8-bits. However, custom lowering can leverage the
921 // v8i8/v16i8 vcnt instruction.
922 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
923 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
924 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
925 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
926 setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
927 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
928
929 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
930 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
931
932 // NEON does not have single instruction CTTZ for vectors.
933 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
934 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
935 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
936 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
937
938 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
939 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
940 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
941 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
942
943 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
944 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
945 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
946 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
947
948 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
949 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
950 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
951 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
952
953 // NEON only has FMA instructions as of VFP4.
954 if (!Subtarget->hasVFP4Base()) {
955 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
956 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
957 }
958
959 setTargetDAGCombine(ISD::SHL);
960 setTargetDAGCombine(ISD::SRL);
961 setTargetDAGCombine(ISD::SRA);
962 setTargetDAGCombine(ISD::FP_TO_SINT);
963 setTargetDAGCombine(ISD::FP_TO_UINT);
964 setTargetDAGCombine(ISD::FDIV);
965 setTargetDAGCombine(ISD::LOAD);
966
967 // It is legal to extload from v4i8 to v4i16 or v4i32.
968 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
969 MVT::v2i32}) {
970 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
971 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
972 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
973 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
974 }
975 }
976 }
977
978 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
979 setTargetDAGCombine(ISD::BUILD_VECTOR);
980 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
981 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
982 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
983 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
984 setTargetDAGCombine(ISD::STORE);
985 setTargetDAGCombine(ISD::SIGN_EXTEND);
986 setTargetDAGCombine(ISD::ZERO_EXTEND);
987 setTargetDAGCombine(ISD::ANY_EXTEND);
988 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
989 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
990 setTargetDAGCombine(ISD::INTRINSIC_VOID);
991 setTargetDAGCombine(ISD::VECREDUCE_ADD);
992 setTargetDAGCombine(ISD::ADD);
993 setTargetDAGCombine(ISD::BITCAST);
994 }
995 if (Subtarget->hasMVEIntegerOps()) {
996 setTargetDAGCombine(ISD::SMIN);
997 setTargetDAGCombine(ISD::UMIN);
998 setTargetDAGCombine(ISD::SMAX);
999 setTargetDAGCombine(ISD::UMAX);
1000 setTargetDAGCombine(ISD::FP_EXTEND);
1001 setTargetDAGCombine(ISD::SELECT);
1002 setTargetDAGCombine(ISD::SELECT_CC);
1003 }
1004
1005 if (!Subtarget->hasFP64()) {
1006 // When targeting a floating-point unit with only single-precision
1007 // operations, f64 is legal for the few double-precision instructions which
1008 // are present However, no double-precision operations other than moves,
1009 // loads and stores are provided by the hardware.
1010 setOperationAction(ISD::FADD, MVT::f64, Expand);
1011 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1012 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1013 setOperationAction(ISD::FMA, MVT::f64, Expand);
1014 setOperationAction(ISD::FDIV, MVT::f64, Expand);
1015 setOperationAction(ISD::FREM, MVT::f64, Expand);
1016 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
1017 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
1018 setOperationAction(ISD::FNEG, MVT::f64, Expand);
1019 setOperationAction(ISD::FABS, MVT::f64, Expand);
1020 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
1021 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1022 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1023 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1024 setOperationAction(ISD::FLOG, MVT::f64, Expand);
1025 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
1026 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
1027 setOperationAction(ISD::FEXP, MVT::f64, Expand);
1028 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
1029 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
1030 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
1031 setOperationAction(ISD::FRINT, MVT::f64, Expand);
1032 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
1033 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
1034 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
1035 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
1036 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
1037 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
1038 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
1039 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
1040 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
1041 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
1042 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
1043 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
1044 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
1045 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
1046 }
1047
1048 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1049 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
1050 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
1051 if (Subtarget->hasFullFP16()) {
1052 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1053 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1054 }
1055 }
1056
1057 if (!Subtarget->hasFP16()) {
1058 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
1059 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
1060 }
1061
1062 computeRegisterProperties(Subtarget->getRegisterInfo());
1063
1064 // ARM does not have floating-point extending loads.
1065 for (MVT VT : MVT::fp_valuetypes()) {
1066 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1067 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1068 }
1069
1070 // ... or truncating stores
1071 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1072 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1073 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1074
1075 // ARM does not have i1 sign extending load.
1076 for (MVT VT : MVT::integer_valuetypes())
1077 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1078
1079 // ARM supports all 4 flavors of integer indexed load / store.
1080 if (!Subtarget->isThumb1Only()) {
1081 for (unsigned im = (unsigned)ISD::PRE_INC;
1082 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1083 setIndexedLoadAction(im, MVT::i1, Legal);
1084 setIndexedLoadAction(im, MVT::i8, Legal);
1085 setIndexedLoadAction(im, MVT::i16, Legal);
1086 setIndexedLoadAction(im, MVT::i32, Legal);
1087 setIndexedStoreAction(im, MVT::i1, Legal);
1088 setIndexedStoreAction(im, MVT::i8, Legal);
1089 setIndexedStoreAction(im, MVT::i16, Legal);
1090 setIndexedStoreAction(im, MVT::i32, Legal);
1091 }
1092 } else {
1093 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1094 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1095 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1096 }
1097
1098 setOperationAction(ISD::SADDO, MVT::i32, Custom);
1099 setOperationAction(ISD::UADDO, MVT::i32, Custom);
1100 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
1101 setOperationAction(ISD::USUBO, MVT::i32, Custom);
1102
1103 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
1104 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1105 if (Subtarget->hasDSP()) {
1106 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1107 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1108 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1109 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1110 }
1111 if (Subtarget->hasBaseDSP()) {
1112 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1113 setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
1114 }
1115
1116 // i64 operation support.
1117 setOperationAction(ISD::MUL, MVT::i64, Expand);
1118 setOperationAction(ISD::MULHU, MVT::i32, Expand);
1119 if (Subtarget->isThumb1Only()) {
1120 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
1121 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
1122 }
1123 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1124 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1125 setOperationAction(ISD::MULHS, MVT::i32, Expand);
1126
1127 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
1128 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
1129 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
1130 setOperationAction(ISD::SRL, MVT::i64, Custom);
1131 setOperationAction(ISD::SRA, MVT::i64, Custom);
1132 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1133 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1134 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1135 setOperationAction(ISD::STORE, MVT::i64, Custom);
1136
1137 // MVE lowers 64 bit shifts to lsll and lsrl
1138 // assuming that ISD::SRL and SRA of i64 are already marked custom
1139 if (Subtarget->hasMVEIntegerOps())
1140 setOperationAction(ISD::SHL, MVT::i64, Custom);
1141
1142 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1143 if (Subtarget->isThumb1Only()) {
1144 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
1145 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
1146 setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
1147 }
1148
1149 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1150 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1151
1152 // ARM does not have ROTL.
1153 setOperationAction(ISD::ROTL, MVT::i32, Expand);
1154 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1155 setOperationAction(ISD::ROTL, VT, Expand);
1156 setOperationAction(ISD::ROTR, VT, Expand);
1157 }
1158 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1159 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1160 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1161 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
1162 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
1163 }
1164
1165 // @llvm.readcyclecounter requires the Performance Monitors extension.
1166 // Default to the 0 expansion on unsupported platforms.
1167 // FIXME: Technically there are older ARM CPUs that have
1168 // implementation-specific ways of obtaining this information.
1169 if (Subtarget->hasPerfMon())
1170 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1171
1172 // Only ARMv6 has BSWAP.
1173 if (!Subtarget->hasV6Ops())
1174 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
1175
1176 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1177 : Subtarget->hasDivideInARMMode();
1178 if (!hasDivide) {
1179 // These are expanded into libcalls if the cpu doesn't have HW divider.
1180 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
1181 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
1182 }
1183
1184 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1185 setOperationAction(ISD::SDIV, MVT::i32, Custom);
1186 setOperationAction(ISD::UDIV, MVT::i32, Custom);
1187
1188 setOperationAction(ISD::SDIV, MVT::i64, Custom);
1189 setOperationAction(ISD::UDIV, MVT::i64, Custom);
1190 }
1191
1192 setOperationAction(ISD::SREM, MVT::i32, Expand);
1193 setOperationAction(ISD::UREM, MVT::i32, Expand);
1194
1195 // Register based DivRem for AEABI (RTABI 4.2)
1196 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1197 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1198 Subtarget->isTargetWindows()) {
1199 setOperationAction(ISD::SREM, MVT::i64, Custom);
1200 setOperationAction(ISD::UREM, MVT::i64, Custom);
1201 HasStandaloneRem = false;
1202
1203 if (Subtarget->isTargetWindows()) {
1204 const struct {
1205 const RTLIB::Libcall Op;
1206 const char * const Name;
1207 const CallingConv::ID CC;
1208 } LibraryCalls[] = {
1209 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1210 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1211 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1212 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1213
1214 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1215 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1216 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1217 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1218 };
1219
1220 for (const auto &LC : LibraryCalls) {
1221 setLibcallName(LC.Op, LC.Name);
1222 setLibcallCallingConv(LC.Op, LC.CC);
1223 }
1224 } else {
1225 const struct {
1226 const RTLIB::Libcall Op;
1227 const char * const Name;
1228 const CallingConv::ID CC;
1229 } LibraryCalls[] = {
1230 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1231 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1232 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1233 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1234
1235 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1236 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1237 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1238 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1239 };
1240
1241 for (const auto &LC : LibraryCalls) {
1242 setLibcallName(LC.Op, LC.Name);
1243 setLibcallCallingConv(LC.Op, LC.CC);
1244 }
1245 }
1246
1247 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
1248 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
1249 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
1250 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
1251 } else {
1252 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
1253 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
1254 }
1255
1256 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1257 // MSVCRT doesn't have powi; fall back to pow
1258 setLibcallName(RTLIB::POWI_F32, nullptr);
1259 setLibcallName(RTLIB::POWI_F64, nullptr);
1260 }
1261
1262 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1263 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1264 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1265 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1266
1267 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1268 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1269
1270 // Use the default implementation.
1271 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1272 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1273 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1274 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1275 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1276 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1277
1278 if (Subtarget->isTargetWindows())
1279 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1280 else
1281 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1282
1283 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1284 // the default expansion.
1285 InsertFencesForAtomic = false;
1286 if (Subtarget->hasAnyDataBarrier() &&
1287 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1288 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1289 // to ldrex/strex loops already.
1290 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1291 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1292 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1293
1294 // On v8, we have particularly efficient implementations of atomic fences
1295 // if they can be combined with nearby atomic loads and stores.
1296 if (!Subtarget->hasAcquireRelease() ||
1297 getTargetMachine().getOptLevel() == 0) {
1298 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1299 InsertFencesForAtomic = true;
1300 }
1301 } else {
1302 // If there's anything we can use as a barrier, go through custom lowering
1303 // for ATOMIC_FENCE.
1304 // If target has DMB in thumb, Fences can be inserted.
1305 if (Subtarget->hasDataBarrier())
1306 InsertFencesForAtomic = true;
1307
1308 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1309 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1310
1311 // Set them all for expansion, which will force libcalls.
1312 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1313 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1314 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1315 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1316 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1317 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1318 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1319 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1320 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1321 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1322 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1323 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1324 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1325 // Unordered/Monotonic case.
1326 if (!InsertFencesForAtomic) {
1327 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1328 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1329 }
1330 }
1331
1332 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1333
1334 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1335 if (!Subtarget->hasV6Ops()) {
1336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1338 }
1339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1340
1341 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1342 !Subtarget->isThumb1Only()) {
1343 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1344 // iff target supports vfp2.
1345 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1346 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1347 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1348 }
1349
1350 // We want to custom lower some of our intrinsics.
1351 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1352 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1353 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1354 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1355 if (Subtarget->useSjLjEH())
1356 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1357
1358 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1359 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1360 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1361 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1362 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1363 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1364 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1365 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1366 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1367 if (Subtarget->hasFullFP16()) {
1368 setOperationAction(ISD::SETCC, MVT::f16, Expand);
1369 setOperationAction(ISD::SELECT, MVT::f16, Custom);
1370 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1371 }
1372
1373 setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1374
1375 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1376 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1377 if (Subtarget->hasFullFP16())
1378 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1379 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1380 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1381 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1382
1383 // We don't support sin/cos/fmod/copysign/pow
1384 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1385 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1386 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1387 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1388 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1389 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1390 setOperationAction(ISD::FREM, MVT::f64, Expand);
1391 setOperationAction(ISD::FREM, MVT::f32, Expand);
1392 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1393 !Subtarget->isThumb1Only()) {
1394 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1395 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1396 }
1397 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1398 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1399
1400 if (!Subtarget->hasVFP4Base()) {
1401 setOperationAction(ISD::FMA, MVT::f64, Expand);
1402 setOperationAction(ISD::FMA, MVT::f32, Expand);
1403 }
1404
1405 // Various VFP goodness
1406 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1407 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1408 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1409 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1410 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1411 }
1412
1413 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1414 if (!Subtarget->hasFP16()) {
1415 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1416 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1417 }
1418
1419 // Strict floating-point comparisons need custom lowering.
1420 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1421 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1422 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
1423 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
1424 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
1425 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
1426 }
1427
1428 // Use __sincos_stret if available.
1429 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1430 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1431 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1432 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1433 }
1434
1435 // FP-ARMv8 implements a lot of rounding-like FP operations.
1436 if (Subtarget->hasFPARMv8Base()) {
1437 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1438 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1439 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1440 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1441 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1442 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1443 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1444 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1445 if (Subtarget->hasNEON()) {
1446 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1447 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1448 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1449 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1450 }
1451
1452 if (Subtarget->hasFP64()) {
1453 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1454 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1455 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1456 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1457 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1458 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1459 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1460 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1461 }
1462 }
1463
1464 // FP16 often need to be promoted to call lib functions
1465 if (Subtarget->hasFullFP16()) {
1466 setOperationAction(ISD::FREM, MVT::f16, Promote);
1467 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
1468 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1469 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1470 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1471 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1472 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1473 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1474 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1475 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1476 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1477 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1478
1479 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1480 }
1481
1482 if (Subtarget->hasNEON()) {
1483 // vmin and vmax aren't available in a scalar form, so we can use
1484 // a NEON instruction with an undef lane instead. This has a performance
1485 // penalty on some cores, so we don't do this unless we have been
1486 // asked to by the core tuning model.
1487 if (Subtarget->useNEONForSinglePrecisionFP()) {
1488 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1489 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1490 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1491 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1492 }
1493 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1494 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1495 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1496 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1497
1498 if (Subtarget->hasFullFP16()) {
1499 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1500 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1501 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1502 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1503
1504 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1505 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1506 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1507 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1508 }
1509 }
1510
1511 // We have target-specific dag combine patterns for the following nodes:
1512 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1513 setTargetDAGCombine(ISD::ADD);
1514 setTargetDAGCombine(ISD::SUB);
1515 setTargetDAGCombine(ISD::MUL);
1516 setTargetDAGCombine(ISD::AND);
1517 setTargetDAGCombine(ISD::OR);
1518 setTargetDAGCombine(ISD::XOR);
1519
1520 if (Subtarget->hasMVEIntegerOps())
1521 setTargetDAGCombine(ISD::VSELECT);
1522
1523 if (Subtarget->hasV6Ops())
1524 setTargetDAGCombine(ISD::SRL);
1525 if (Subtarget->isThumb1Only())
1526 setTargetDAGCombine(ISD::SHL);
1527
1528 setStackPointerRegisterToSaveRestore(ARM::SP);
1529
1530 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1531 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1532 setSchedulingPreference(Sched::RegPressure);
1533 else
1534 setSchedulingPreference(Sched::Hybrid);
1535
1536 //// temporary - rewrite interface to use type
1537 MaxStoresPerMemset = 8;
1538 MaxStoresPerMemsetOptSize = 4;
1539 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1540 MaxStoresPerMemcpyOptSize = 2;
1541 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1542 MaxStoresPerMemmoveOptSize = 2;
1543
1544 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1545 // are at least 4 bytes aligned.
1546 setMinStackArgumentAlignment(Align(4));
1547
1548 // Prefer likely predicted branches to selects on out-of-order cores.
1549 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1550
1551 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1552
1553 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1554
1555 if (Subtarget->isThumb() || Subtarget->isThumb2())
1556 setTargetDAGCombine(ISD::ABS);
1557}
1558
1559bool ARMTargetLowering::useSoftFloat() const {
1560 return Subtarget->useSoftFloat();
1561}
1562
1563// FIXME: It might make sense to define the representative register class as the
1564// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1565// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1566// SPR's representative would be DPR_VFP2. This should work well if register
1567// pressure tracking were modified such that a register use would increment the
1568// pressure of the register class's representative and all of it's super
1569// classes' representatives transitively. We have not implemented this because
1570// of the difficulty prior to coalescing of modeling operand register classes
1571// due to the common occurrence of cross class copies and subregister insertions
1572// and extractions.
1573std::pair<const TargetRegisterClass *, uint8_t>
1574ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1575 MVT VT) const {
1576 const TargetRegisterClass *RRC = nullptr;
1577 uint8_t Cost = 1;
1578 switch (VT.SimpleTy) {
1579 default:
1580 return TargetLowering::findRepresentativeClass(TRI, VT);
1581 // Use DPR as representative register class for all floating point
1582 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1583 // the cost is 1 for both f32 and f64.
1584 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1585 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1586 RRC = &ARM::DPRRegClass;
1587 // When NEON is used for SP, only half of the register file is available
1588 // because operations that define both SP and DP results will be constrained
1589 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1590 // coalescing by double-counting the SP regs. See the FIXME above.
1591 if (Subtarget->useNEONForSinglePrecisionFP())
1592 Cost = 2;
1593 break;
1594 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1595 case MVT::v4f32: case MVT::v2f64:
1596 RRC = &ARM::DPRRegClass;
1597 Cost = 2;
1598 break;
1599 case MVT::v4i64:
1600 RRC = &ARM::DPRRegClass;
1601 Cost = 4;
1602 break;
1603 case MVT::v8i64:
1604 RRC = &ARM::DPRRegClass;
1605 Cost = 8;
1606 break;
1607 }
1608 return std::make_pair(RRC, Cost);
1609}
1610
1611const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1612 switch ((ARMISD::NodeType)Opcode) {
1613 case ARMISD::FIRST_NUMBER: break;
1614 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1615 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1616 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1617 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1618 case ARMISD::CALL: return "ARMISD::CALL";
1619 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1620 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1621 case ARMISD::tSECALL: return "ARMISD::tSECALL";
1622 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1623 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1624 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1625 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1626 case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG";
1627 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1628 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1629 case ARMISD::CMP: return "ARMISD::CMP";
1630 case ARMISD::CMN: return "ARMISD::CMN";
1631 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1632 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1633 case ARMISD::CMPFPE: return "ARMISD::CMPFPE";
1634 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1635 case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0";
1636 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1637 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1638
1639 case ARMISD::CMOV: return "ARMISD::CMOV";
1640 case ARMISD::SUBS: return "ARMISD::SUBS";
1641
1642 case ARMISD::SSAT: return "ARMISD::SSAT";
1643 case ARMISD::USAT: return "ARMISD::USAT";
1644
1645 case ARMISD::ASRL: return "ARMISD::ASRL";
1646 case ARMISD::LSRL: return "ARMISD::LSRL";
1647 case ARMISD::LSLL: return "ARMISD::LSLL";
1648
1649 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1650 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1651 case ARMISD::RRX: return "ARMISD::RRX";
1652
1653 case ARMISD::ADDC: return "ARMISD::ADDC";
1654 case ARMISD::ADDE: return "ARMISD::ADDE";
1655 case ARMISD::SUBC: return "ARMISD::SUBC";
1656 case ARMISD::SUBE: return "ARMISD::SUBE";
1657 case ARMISD::LSLS: return "ARMISD::LSLS";
1658
1659 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1660 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1661 case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1662 case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1663 case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1664
1665 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1666 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1667 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1668
1669 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1670
1671 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1672
1673 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1674
1675 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1676
1677 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1678
1679 case ARMISD::LDRD: return "ARMISD::LDRD";
1680 case ARMISD::STRD: return "ARMISD::STRD";
1681
1682 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1683 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1684
1685 case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1686 case ARMISD::VECTOR_REG_CAST: return "ARMISD::VECTOR_REG_CAST";
1687 case ARMISD::VCMP: return "ARMISD::VCMP";
1688 case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1689 case ARMISD::VTST: return "ARMISD::VTST";
1690
1691 case ARMISD::VSHLs: return "ARMISD::VSHLs";
1692 case ARMISD::VSHLu: return "ARMISD::VSHLu";
1693 case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1694 case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1695 case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1696 case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1697 case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1698 case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1699 case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1700 case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1701 case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1702 case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1703 case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1704 case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1705 case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1706 case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1707 case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1708 case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1709 case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1710 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1711 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1712 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1713 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1714 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1715 case ARMISD::VDUP: return "ARMISD::VDUP";
1716 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1717 case ARMISD::VEXT: return "ARMISD::VEXT";
1718 case ARMISD::VREV64: return "ARMISD::VREV64";
1719 case ARMISD::VREV32: return "ARMISD::VREV32";
1720 case ARMISD::VREV16: return "ARMISD::VREV16";
1721 case ARMISD::VZIP: return "ARMISD::VZIP";
1722 case ARMISD::VUZP: return "ARMISD::VUZP";
1723 case ARMISD::VTRN: return "ARMISD::VTRN";
1724 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1725 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1726 case ARMISD::VMOVN: return "ARMISD::VMOVN";
1727 case ARMISD::VQMOVNs: return "ARMISD::VQMOVNs";
1728 case ARMISD::VQMOVNu: return "ARMISD::VQMOVNu";
1729 case ARMISD::VCVTN: return "ARMISD::VCVTN";
1730 case ARMISD::VCVTL: return "ARMISD::VCVTL";
1731 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1732 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1733 case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
1734 case ARMISD::VADDVs: return "ARMISD::VADDVs";
1735 case ARMISD::VADDVu: return "ARMISD::VADDVu";
1736 case ARMISD::VADDVps: return "ARMISD::VADDVps";
1737 case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
1738 case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
1739 case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
1740 case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
1741 case ARMISD::VADDLVAu: return "ARMISD::VADDLVAu";
1742 case ARMISD::VADDLVps: return "ARMISD::VADDLVps";
1743 case ARMISD::VADDLVpu: return "ARMISD::VADDLVpu";
1744 case ARMISD::VADDLVAps: return "ARMISD::VADDLVAps";
1745 case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
1746 case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
1747 case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
1748 case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
1749 case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
1750 case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
1751 case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
1752 case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
1753 case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
1754 case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
1755 case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
1756 case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
1757 case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
1758 case ARMISD::VMINVu: return "ARMISD::VMINVu";
1759 case ARMISD::VMINVs: return "ARMISD::VMINVs";
1760 case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
1761 case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
1762 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1763 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1764 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1765 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1766 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1767 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1768 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1769 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1770 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1771 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1772 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1773 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1774 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1775 case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1776 case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1777 case ARMISD::QADD16b: return "ARMISD::QADD16b";
1778 case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1779 case ARMISD::QADD8b: return "ARMISD::QADD8b";
1780 case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
1781 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1782 case ARMISD::BFI: return "ARMISD::BFI";
1783 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1784 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1785 case ARMISD::VBSP: return "ARMISD::VBSP";
1786 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1787 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1788 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1789 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1790 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1791 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1792 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1793 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1794 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1795 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1796 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1797 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1798 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1799 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1800 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1801 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1802 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1803 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1804 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1805 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1806 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1807 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1808 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1809 case ARMISD::WLS: return "ARMISD::WLS";
1810 case ARMISD::WLSSETUP: return "ARMISD::WLSSETUP";
1811 case ARMISD::LE: return "ARMISD::LE";
1812 case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1813 case ARMISD::CSINV: return "ARMISD::CSINV";
1814 case ARMISD::CSNEG: return "ARMISD::CSNEG";
1815 case ARMISD::CSINC: return "ARMISD::CSINC";
1816 }
1817 return nullptr;
1818}
1819
1820EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1821 EVT VT) const {
1822 if (!VT.isVector())
1823 return getPointerTy(DL);
1824
1825 // MVE has a predicate register.
1826 if (Subtarget->hasMVEIntegerOps() &&
1827 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1828 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1829 return VT.changeVectorElementTypeToInteger();
1830}
1831
1832/// getRegClassFor - Return the register class that should be used for the
1833/// specified value type.
1834const TargetRegisterClass *
1835ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1836 (void)isDivergent;
1837 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1838 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1839 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1840 // MVE Q registers.
1841 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1842 if (VT == MVT::v4i64)
1843 return &ARM::QQPRRegClass;
1844 if (VT == MVT::v8i64)
1845 return &ARM::QQQQPRRegClass;
1846 }
1847 return TargetLowering::getRegClassFor(VT);
1848}
1849
1850// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1851// source/dest is aligned and the copy size is large enough. We therefore want
1852// to align such objects passed to memory intrinsics.
1853bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1854 unsigned &PrefAlign) const {
1855 if (!isa<MemIntrinsic>(CI))
1856 return false;
1857 MinSize = 8;
1858 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1859 // cycle faster than 4-byte aligned LDM.
1860 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1861 return true;
1862}
1863
1864// Create a fast isel object.
1865FastISel *
1866ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1867 const TargetLibraryInfo *libInfo) const {
1868 return ARM::createFastISel(funcInfo, libInfo);
1869}
1870
1871Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1872 unsigned NumVals = N->getNumValues();
1873 if (!NumVals)
1874 return Sched::RegPressure;
1875
1876 for (unsigned i = 0; i != NumVals; ++i) {
1877 EVT VT = N->getValueType(i);
1878 if (VT == MVT::Glue || VT == MVT::Other)
1879 continue;
1880 if (VT.isFloatingPoint() || VT.isVector())
1881 return Sched::ILP;
1882 }
1883
1884 if (!N->isMachineOpcode())
1885 return Sched::RegPressure;
1886
1887 // Load are scheduled for latency even if there instruction itinerary
1888 // is not available.
1889 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1890 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1891
1892 if (MCID.getNumDefs() == 0)
1893 return Sched::RegPressure;
1894 if (!Itins->isEmpty() &&
1895 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1896 return Sched::ILP;
1897
1898 return Sched::RegPressure;
1899}
1900
1901//===----------------------------------------------------------------------===//
1902// Lowering Code
1903//===----------------------------------------------------------------------===//
1904
1905static bool isSRL16(const SDValue &Op) {
1906 if (Op.getOpcode() != ISD::SRL)
1907 return false;
1908 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1909 return Const->getZExtValue() == 16;
1910 return false;
1911}
1912
1913static bool isSRA16(const SDValue &Op) {
1914 if (Op.getOpcode() != ISD::SRA)
1915 return false;
1916 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1917 return Const->getZExtValue() == 16;
1918 return false;
1919}
1920
1921static bool isSHL16(const SDValue &Op) {
1922 if (Op.getOpcode() != ISD::SHL)
1923 return false;
1924 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1925 return Const->getZExtValue() == 16;
1926 return false;
1927}
1928
1929// Check for a signed 16-bit value. We special case SRA because it makes it
1930// more simple when also looking for SRAs that aren't sign extending a
1931// smaller value. Without the check, we'd need to take extra care with
1932// checking order for some operations.
1933static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1934 if (isSRA16(Op))
1935 return isSHL16(Op.getOperand(0));
1936 return DAG.ComputeNumSignBits(Op) == 17;
1937}
1938
1939/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1940static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1941 switch (CC) {
1942 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 1942)
;
1943 case ISD::SETNE: return ARMCC::NE;
1944 case ISD::SETEQ: return ARMCC::EQ;
1945 case ISD::SETGT: return ARMCC::GT;
1946 case ISD::SETGE: return ARMCC::GE;
1947 case ISD::SETLT: return ARMCC::LT;
1948 case ISD::SETLE: return ARMCC::LE;
1949 case ISD::SETUGT: return ARMCC::HI;
1950 case ISD::SETUGE: return ARMCC::HS;
1951 case ISD::SETULT: return ARMCC::LO;
1952 case ISD::SETULE: return ARMCC::LS;
1953 }
1954}
1955
1956/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1957static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1958 ARMCC::CondCodes &CondCode2) {
1959 CondCode2 = ARMCC::AL;
1960 switch (CC) {
1961 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 1961)
;
1962 case ISD::SETEQ:
1963 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1964 case ISD::SETGT:
1965 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1966 case ISD::SETGE:
1967 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1968 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1969 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1970 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1971 case ISD::SETO: CondCode = ARMCC::VC; break;
1972 case ISD::SETUO: CondCode = ARMCC::VS; break;
1973 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1974 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1975 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1976 case ISD::SETLT:
1977 case ISD::SETULT: CondCode = ARMCC::LT; break;
1978 case ISD::SETLE:
1979 case ISD::SETULE: CondCode = ARMCC::LE; break;
1980 case ISD::SETNE:
1981 case ISD::SETUNE: CondCode = ARMCC::NE; break;
1982 }
1983}
1984
1985//===----------------------------------------------------------------------===//
1986// Calling Convention Implementation
1987//===----------------------------------------------------------------------===//
1988
1989/// getEffectiveCallingConv - Get the effective calling convention, taking into
1990/// account presence of floating point hardware and calling convention
1991/// limitations, such as support for variadic functions.
1992CallingConv::ID
1993ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1994 bool isVarArg) const {
1995 switch (CC) {
1996 default:
1997 report_fatal_error("Unsupported calling convention");
1998 case CallingConv::ARM_AAPCS:
1999 case CallingConv::ARM_APCS:
2000 case CallingConv::GHC:
2001 case CallingConv::CFGuard_Check:
2002 return CC;
2003 case CallingConv::PreserveMost:
2004 return CallingConv::PreserveMost;
2005 case CallingConv::ARM_AAPCS_VFP:
2006 case CallingConv::Swift:
2007 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
2008 case CallingConv::C:
2009 if (!Subtarget->isAAPCS_ABI())
2010 return CallingConv::ARM_APCS;
2011 else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2012 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2013 !isVarArg)
2014 return CallingConv::ARM_AAPCS_VFP;
2015 else
2016 return CallingConv::ARM_AAPCS;
2017 case CallingConv::Fast:
2018 case CallingConv::CXX_FAST_TLS:
2019 if (!Subtarget->isAAPCS_ABI()) {
2020 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2021 return CallingConv::Fast;
2022 return CallingConv::ARM_APCS;
2023 } else if (Subtarget->hasVFP2Base() &&
2024 !Subtarget->isThumb1Only() && !isVarArg)
2025 return CallingConv::ARM_AAPCS_VFP;
2026 else
2027 return CallingConv::ARM_AAPCS;
2028 }
2029}
2030
2031CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2032 bool isVarArg) const {
2033 return CCAssignFnForNode(CC, false, isVarArg);
2034}
2035
2036CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
2037 bool isVarArg) const {
2038 return CCAssignFnForNode(CC, true, isVarArg);
2039}
2040
2041/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2042/// CallingConvention.
2043CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2044 bool Return,
2045 bool isVarArg) const {
2046 switch (getEffectiveCallingConv(CC, isVarArg)) {
2047 default:
2048 report_fatal_error("Unsupported calling convention");
2049 case CallingConv::ARM_APCS:
2050 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2051 case CallingConv::ARM_AAPCS:
2052 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2053 case CallingConv::ARM_AAPCS_VFP:
2054 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2055 case CallingConv::Fast:
2056 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2057 case CallingConv::GHC:
2058 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2059 case CallingConv::PreserveMost:
2060 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2061 case CallingConv::CFGuard_Check:
2062 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2063 }
2064}
2065
2066SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2067 MVT LocVT, MVT ValVT, SDValue Val) const {
2068 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2069 Val);
2070 if (Subtarget->hasFullFP16()) {
2071 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2072 } else {
2073 Val = DAG.getNode(ISD::TRUNCATE, dl,
2074 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2075 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2076 }
2077 return Val;
2078}
2079
2080SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2081 MVT LocVT, MVT ValVT,
2082 SDValue Val) const {
2083 if (Subtarget->hasFullFP16()) {
2084 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2085 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2086 } else {
2087 Val = DAG.getNode(ISD::BITCAST, dl,
2088 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2089 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2090 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2091 }
2092 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2093}
2094
2095/// LowerCallResult - Lower the result values of a call into the
2096/// appropriate copies out of appropriate physical registers.
2097SDValue ARMTargetLowering::LowerCallResult(
2098 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2099 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2100 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2101 SDValue ThisVal) const {
2102 // Assign locations to each value returned by this call.
2103 SmallVector<CCValAssign, 16> RVLocs;
2104 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2105 *DAG.getContext());
2106 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2107
2108 // Copy all of the result registers out of their specified physreg.
2109 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2110 CCValAssign VA = RVLocs[i];
2111
2112 // Pass 'this' value directly from the argument to return value, to avoid
2113 // reg unit interference
2114 if (i == 0 && isThisReturn) {
2115 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2116, __PRETTY_FUNCTION__))
2116 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2116, __PRETTY_FUNCTION__))
;
2117 InVals.push_back(ThisVal);
2118 continue;
2119 }
2120
2121 SDValue Val;
2122 if (VA.needsCustom() &&
2123 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2124 // Handle f64 or half of a v2f64.
2125 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2126 InFlag);
2127 Chain = Lo.getValue(1);
2128 InFlag = Lo.getValue(2);
2129 VA = RVLocs[++i]; // skip ahead to next loc
2130 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2131 InFlag);
2132 Chain = Hi.getValue(1);
2133 InFlag = Hi.getValue(2);
2134 if (!Subtarget->isLittle())
2135 std::swap (Lo, Hi);
2136 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2137
2138 if (VA.getLocVT() == MVT::v2f64) {
2139 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2140 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2141 DAG.getConstant(0, dl, MVT::i32));
2142
2143 VA = RVLocs[++i]; // skip ahead to next loc
2144 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2145 Chain = Lo.getValue(1);
2146 InFlag = Lo.getValue(2);
2147 VA = RVLocs[++i]; // skip ahead to next loc
2148 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2149 Chain = Hi.getValue(1);
2150 InFlag = Hi.getValue(2);
2151 if (!Subtarget->isLittle())
2152 std::swap (Lo, Hi);
2153 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2154 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2155 DAG.getConstant(1, dl, MVT::i32));
2156 }
2157 } else {
2158 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2159 InFlag);
2160 Chain = Val.getValue(1);
2161 InFlag = Val.getValue(2);
2162 }
2163
2164 switch (VA.getLocInfo()) {
2165 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2165)
;
2166 case CCValAssign::Full: break;
2167 case CCValAssign::BCvt:
2168 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2169 break;
2170 }
2171
2172 // f16 arguments have their size extended to 4 bytes and passed as if they
2173 // had been copied to the LSBs of a 32-bit register.
2174 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2175 if (VA.needsCustom() &&
2176 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2177 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2178
2179 InVals.push_back(Val);
2180 }
2181
2182 return Chain;
2183}
2184
2185/// LowerMemOpCallTo - Store the argument to the stack.
2186SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2187 SDValue Arg, const SDLoc &dl,
2188 SelectionDAG &DAG,
2189 const CCValAssign &VA,
2190 ISD::ArgFlagsTy Flags) const {
2191 unsigned LocMemOffset = VA.getLocMemOffset();
2192 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2193 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2194 StackPtr, PtrOff);
2195 return DAG.getStore(
32
Calling 'SelectionDAG::getStore'
2196 Chain, dl, Arg, PtrOff,
31
Null pointer value stored to 'Val.Node'
2197 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2198}
2199
2200void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2201 SDValue Chain, SDValue &Arg,
2202 RegsToPassVector &RegsToPass,
2203 CCValAssign &VA, CCValAssign &NextVA,
2204 SDValue &StackPtr,
2205 SmallVectorImpl<SDValue> &MemOpChains,
2206 ISD::ArgFlagsTy Flags) const {
2207 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2208 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2209 unsigned id = Subtarget->isLittle() ? 0 : 1;
24
Assuming the condition is false
25
'?' condition is false
2210 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2211
2212 if (NextVA.isRegLoc())
26
Taking false branch
2213 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2214 else {
2215 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2215, __PRETTY_FUNCTION__))
;
27
'?' condition is true
2216 if (!StackPtr.getNode())
28
Assuming the condition is false
29
Taking false branch
2217 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2218 getPointerTy(DAG.getDataLayout()));
2219
2220 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
30
Calling 'ARMTargetLowering::LowerMemOpCallTo'
2221 dl, DAG, NextVA,
2222 Flags));
2223 }
2224}
2225
2226/// LowerCall - Lowering a call into a callseq_start <-
2227/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2228/// nodes.
2229SDValue
2230ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2231 SmallVectorImpl<SDValue> &InVals) const {
2232 SelectionDAG &DAG = CLI.DAG;
2233 SDLoc &dl = CLI.DL;
2234 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2235 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2236 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2237 SDValue Chain = CLI.Chain;
2238 SDValue Callee = CLI.Callee;
2239 bool &isTailCall = CLI.IsTailCall;
2240 CallingConv::ID CallConv = CLI.CallConv;
2241 bool doesNotRet = CLI.DoesNotReturn;
2242 bool isVarArg = CLI.IsVarArg;
2243
2244 MachineFunction &MF = DAG.getMachineFunction();
2245 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2246 MachineFunction::CallSiteInfo CSInfo;
2247 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1
'?' condition is false
2248 bool isThisReturn = false;
2249 bool isCmseNSCall = false;
2250 bool PreferIndirect = false;
2251
2252 // Determine whether this is a non-secure function call.
2253 if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
2
Assuming field 'CB' is null
3
Taking false branch
2254 isCmseNSCall = true;
2255
2256 // Disable tail calls if they're not supported.
2257 if (!Subtarget->supportsTailCall())
4
Assuming the condition is false
5
Taking false branch
2258 isTailCall = false;
2259
2260 // For both the non-secure calls and the returns from a CMSE entry function,
2261 // the function needs to do some extra work afte r the call, or before the
2262 // return, respectively, thus it cannot end with atail call
2263 if (isCmseNSCall
5.1
'isCmseNSCall' is false
5.1
'isCmseNSCall' is false
5.1
'isCmseNSCall' is false
|| AFI->isCmseNSEntryFunction())
6
Assuming the condition is false
7
Taking false branch
2264 isTailCall = false;
2265
2266 if (isa<GlobalAddressSDNode>(Callee)) {
8
Assuming 'Callee' is not a 'GlobalAddressSDNode'
9
Taking false branch
2267 // If we're optimizing for minimum size and the function is called three or
2268 // more times in this block, we can improve codesize by calling indirectly
2269 // as BLXr has a 16-bit encoding.
2270 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2271 if (CLI.CB) {
2272 auto *BB = CLI.CB->getParent();
2273 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2274 count_if(GV->users(), [&BB](const User *U) {
2275 return isa<Instruction>(U) &&
2276 cast<Instruction>(U)->getParent() == BB;
2277 }) > 2;
2278 }
2279 }
2280 if (isTailCall) {
10
Assuming 'isTailCall' is false
11
Taking false branch
2281 // Check if it's really possible to do a tail call.
2282 isTailCall = IsEligibleForTailCallOptimization(
2283 Callee, CallConv, isVarArg, isStructRet,
2284 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2285 PreferIndirect);
2286 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2287 report_fatal_error("failed to perform tail call elimination on a call "
2288 "site marked musttail");
2289 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2290 // detected sibcalls.
2291 if (isTailCall)
2292 ++NumTailCalls;
2293 }
2294
2295 // Analyze operands of the call, assigning locations to each operand.
2296 SmallVector<CCValAssign, 16> ArgLocs;
2297 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2298 *DAG.getContext());
2299 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2300
2301 // Get a count of how many bytes are to be pushed on the stack.
2302 unsigned NumBytes = CCInfo.getNextStackOffset();
2303
2304 if (isTailCall) {
12
Assuming 'isTailCall' is false
13
Taking false branch
2305 // For tail calls, memory operands are available in our caller's stack.
2306 NumBytes = 0;
2307 } else {
2308 // Adjust the stack pointer for the new arguments...
2309 // These operations are automatically eliminated by the prolog/epilog pass
2310 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2311 }
2312
2313 SDValue StackPtr =
2314 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2315
2316 RegsToPassVector RegsToPass;
2317 SmallVector<SDValue, 8> MemOpChains;
2318
2319 // Walk the register/memloc assignments, inserting copies/loads. In the case
2320 // of tail call optimization, arguments are handled later.
2321 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
15
Loop condition is true. Entering loop body
2322 i != e;
14
Assuming 'i' is not equal to 'e'
2323 ++i, ++realArgIdx) {
2324 CCValAssign &VA = ArgLocs[i];
2325 SDValue Arg = OutVals[realArgIdx];
2326 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2327 bool isByVal = Flags.isByVal();
2328
2329 // Promote the value if needed.
2330 switch (VA.getLocInfo()) {
16
Control jumps to 'case BCvt:' at line 2342
2331 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2331)
;
2332 case CCValAssign::Full: break;
2333 case CCValAssign::SExt:
2334 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2335 break;
2336 case CCValAssign::ZExt:
2337 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2338 break;
2339 case CCValAssign::AExt:
2340 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2341 break;
2342 case CCValAssign::BCvt:
2343 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2344 break;
17
Execution continues on line 2350
2345 }
2346
2347 // f16 arguments have their size extended to 4 bytes and passed as if they
2348 // had been copied to the LSBs of a 32-bit register.
2349 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2350 if (VA.needsCustom() &&
18
Assuming the condition is true
19
Taking false branch
2351 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2352 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2353 } else {
2354 // f16 arguments could have been extended prior to argument lowering.
2355 // Mask them arguments if this is a CMSE nonsecure call.
2356 auto ArgVT = Outs[realArgIdx].ArgVT;
2357 if (isCmseNSCall
19.1
'isCmseNSCall' is false
19.1
'isCmseNSCall' is false
19.1
'isCmseNSCall' is false
&& (ArgVT == MVT::f16)) {
20
Taking false branch
2358 auto LocBits = VA.getLocVT().getSizeInBits();
2359 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2360 SDValue Mask =
2361 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2362 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2363 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2364 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2365 }
2366 }
2367
2368 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2369 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
21
Taking false branch
2370 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2371 DAG.getConstant(0, dl, MVT::i32));
2372 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2373 DAG.getConstant(1, dl, MVT::i32));
2374
2375 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2376 StackPtr, MemOpChains, Flags);
2377
2378 VA = ArgLocs[++i]; // skip ahead to next loc
2379 if (VA.isRegLoc()) {
2380 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2381 StackPtr, MemOpChains, Flags);
2382 } else {
2383 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2383, __PRETTY_FUNCTION__))
;
2384
2385 MemOpChains.push_back(
2386 LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
2387 }
2388 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
22
Taking true branch
2389 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
23
Calling 'ARMTargetLowering::PassF64ArgInRegs'
2390 StackPtr, MemOpChains, Flags);
2391 } else if (VA.isRegLoc()) {
2392 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2393 Outs[0].VT == MVT::i32) {
2394 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2395, __PRETTY_FUNCTION__))
2395 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2395, __PRETTY_FUNCTION__))
;
2396 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2397, __PRETTY_FUNCTION__))
2397 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2397, __PRETTY_FUNCTION__))
;
2398 isThisReturn = true;
2399 }
2400 const TargetOptions &Options = DAG.getTarget().Options;
2401 if (Options.EmitCallSiteInfo)
2402 CSInfo.emplace_back(VA.getLocReg(), i);
2403 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2404 } else if (isByVal) {
2405 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2405, __PRETTY_FUNCTION__))
;
2406 unsigned offset = 0;
2407
2408 // True if this byval aggregate will be split between registers
2409 // and memory.
2410 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2411 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2412
2413 if (CurByValIdx < ByValArgsCount) {
2414
2415 unsigned RegBegin, RegEnd;
2416 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2417
2418 EVT PtrVT =
2419 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2420 unsigned int i, j;
2421 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2422 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2423 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2424 SDValue Load =
2425 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2426 DAG.InferPtrAlign(AddArg));
2427 MemOpChains.push_back(Load.getValue(1));
2428 RegsToPass.push_back(std::make_pair(j, Load));
2429 }
2430
2431 // If parameter size outsides register area, "offset" value
2432 // helps us to calculate stack slot for remained part properly.
2433 offset = RegEnd - RegBegin;
2434
2435 CCInfo.nextInRegsParam();
2436 }
2437
2438 if (Flags.getByValSize() > 4*offset) {
2439 auto PtrVT = getPointerTy(DAG.getDataLayout());
2440 unsigned LocMemOffset = VA.getLocMemOffset();
2441 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2442 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2443 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2444 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2445 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2446 MVT::i32);
2447 SDValue AlignNode =
2448 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2449
2450 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2451 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2452 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2453 Ops));
2454 }
2455 } else if (!isTailCall) {
2456 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2456, __PRETTY_FUNCTION__))
;
2457
2458 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2459 dl, DAG, VA, Flags));
2460 }
2461 }
2462
2463 if (!MemOpChains.empty())
2464 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2465
2466 // Build a sequence of copy-to-reg nodes chained together with token chain
2467 // and flag operands which copy the outgoing args into the appropriate regs.
2468 SDValue InFlag;
2469 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2470 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2471 RegsToPass[i].second, InFlag);
2472 InFlag = Chain.getValue(1);
2473 }
2474
2475 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2476 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2477 // node so that legalize doesn't hack it.
2478 bool isDirect = false;
2479
2480 const TargetMachine &TM = getTargetMachine();
2481 const Module *Mod = MF.getFunction().getParent();
2482 const GlobalValue *GV = nullptr;
2483 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2484 GV = G->getGlobal();
2485 bool isStub =
2486 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2487
2488 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2489 bool isLocalARMFunc = false;
2490 auto PtrVt = getPointerTy(DAG.getDataLayout());
2491
2492 if (Subtarget->genLongCalls()) {
2493 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2494, __PRETTY_FUNCTION__))
2494 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2494, __PRETTY_FUNCTION__))
;
2495 // Handle a global address or an external symbol. If it's not one of
2496 // those, the target's already in a register, so we don't need to do
2497 // anything extra.
2498 if (isa<GlobalAddressSDNode>(Callee)) {
2499 // Create a constant pool entry for the callee address
2500 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2501 ARMConstantPoolValue *CPV =
2502 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2503
2504 // Get the address of the callee into a register
2505 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2506 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2507 Callee = DAG.getLoad(
2508 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2509 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2510 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2511 const char *Sym = S->getSymbol();
2512
2513 // Create a constant pool entry for the callee address
2514 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2515 ARMConstantPoolValue *CPV =
2516 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2517 ARMPCLabelIndex, 0);
2518 // Get the address of the callee into a register
2519 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2520 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2521 Callee = DAG.getLoad(
2522 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2523 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2524 }
2525 } else if (isa<GlobalAddressSDNode>(Callee)) {
2526 if (!PreferIndirect) {
2527 isDirect = true;
2528 bool isDef = GV->isStrongDefinitionForLinker();
2529
2530 // ARM call to a local ARM function is predicable.
2531 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2532 // tBX takes a register source operand.
2533 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2534 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Callee = DAG.getNode(
2536 ARMISD::WrapperPIC, dl, PtrVt,
2537 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2538 Callee = DAG.getLoad(
2539 PtrVt, dl, DAG.getEntryNode(), Callee,
2540 MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
2541 MachineMemOperand::MODereferenceable |
2542 MachineMemOperand::MOInvariant);
2543 } else if (Subtarget->isTargetCOFF()) {
2544 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2545, __PRETTY_FUNCTION__))
2545 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2545, __PRETTY_FUNCTION__))
;
2546 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2547 if (GV->hasDLLImportStorageClass())
2548 TargetFlags = ARMII::MO_DLLIMPORT;
2549 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2550 TargetFlags = ARMII::MO_COFFSTUB;
2551 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2552 TargetFlags);
2553 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2554 Callee =
2555 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2556 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2557 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2558 } else {
2559 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2560 }
2561 }
2562 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2563 isDirect = true;
2564 // tBX takes a register source operand.
2565 const char *Sym = S->getSymbol();
2566 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2567 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2568 ARMConstantPoolValue *CPV =
2569 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2570 ARMPCLabelIndex, 4);
2571 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2572 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2573 Callee = DAG.getLoad(
2574 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2575 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2576 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2577 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2578 } else {
2579 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2580 }
2581 }
2582
2583 if (isCmseNSCall) {
2584 assert(!isARMFunc && !isDirect &&((!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call"
) ? static_cast<void> (0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2585, __PRETTY_FUNCTION__))
2585 "Cannot handle call to ARM function or direct call")((!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call"
) ? static_cast<void> (0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2585, __PRETTY_FUNCTION__))
;
2586 if (NumBytes > 0) {
2587 DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
2588 "call to non-secure function would "
2589 "require passing arguments on stack",
2590 dl.getDebugLoc());
2591 DAG.getContext()->diagnose(Diag);
2592 }
2593 if (isStructRet) {
2594 DiagnosticInfoUnsupported Diag(
2595 DAG.getMachineFunction().getFunction(),
2596 "call to non-secure function would return value through pointer",
2597 dl.getDebugLoc());
2598 DAG.getContext()->diagnose(Diag);
2599 }
2600 }
2601
2602 // FIXME: handle tail calls differently.
2603 unsigned CallOpc;
2604 if (Subtarget->isThumb()) {
2605 if (isCmseNSCall)
2606 CallOpc = ARMISD::tSECALL;
2607 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2608 CallOpc = ARMISD::CALL_NOLINK;
2609 else
2610 CallOpc = ARMISD::CALL;
2611 } else {
2612 if (!isDirect && !Subtarget->hasV5TOps())
2613 CallOpc = ARMISD::CALL_NOLINK;
2614 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2615 // Emit regular call when code size is the priority
2616 !Subtarget->hasMinSize())
2617 // "mov lr, pc; b _foo" to avoid confusing the RSP
2618 CallOpc = ARMISD::CALL_NOLINK;
2619 else
2620 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2621 }
2622
2623 std::vector<SDValue> Ops;
2624 Ops.push_back(Chain);
2625 Ops.push_back(Callee);
2626
2627 // Add argument registers to the end of the list so that they are known live
2628 // into the call.
2629 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2630 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2631 RegsToPass[i].second.getValueType()));
2632
2633 // Add a register mask operand representing the call-preserved registers.
2634 if (!isTailCall) {
2635 const uint32_t *Mask;
2636 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2637 if (isThisReturn) {
2638 // For 'this' returns, use the R0-preserving mask if applicable
2639 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2640 if (!Mask) {
2641 // Set isThisReturn to false if the calling convention is not one that
2642 // allows 'returned' to be modeled in this way, so LowerCallResult does
2643 // not try to pass 'this' straight through
2644 isThisReturn = false;
2645 Mask = ARI->getCallPreservedMask(MF, CallConv);
2646 }
2647 } else
2648 Mask = ARI->getCallPreservedMask(MF, CallConv);
2649
2650 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2650, __PRETTY_FUNCTION__))
;
2651 Ops.push_back(DAG.getRegisterMask(Mask));
2652 }
2653
2654 if (InFlag.getNode())
2655 Ops.push_back(InFlag);
2656
2657 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2658 if (isTailCall) {
2659 MF.getFrameInfo().setHasTailCall();
2660 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2661 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2662 return Ret;
2663 }
2664
2665 // Returns a chain and a flag for retval copy to use.
2666 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2667 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2668 InFlag = Chain.getValue(1);
2669 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2670
2671 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2672 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2673 if (!Ins.empty())
2674 InFlag = Chain.getValue(1);
2675
2676 // Handle result values, copying them out of physregs into vregs that we
2677 // return.
2678 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2679 InVals, isThisReturn,
2680 isThisReturn ? OutVals[0] : SDValue());
2681}
2682
2683/// HandleByVal - Every parameter *after* a byval parameter is passed
2684/// on the stack. Remember the next parameter register to allocate,
2685/// and then confiscate the rest of the parameter registers to insure
2686/// this.
2687void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2688 Align Alignment) const {
2689 // Byval (as with any stack) slots are always at least 4 byte aligned.
2690 Alignment = std::max(Alignment, Align(4));
2691
2692 unsigned Reg = State->AllocateReg(GPRArgRegs);
2693 if (!Reg)
2694 return;
2695
2696 unsigned AlignInRegs = Alignment.value() / 4;
2697 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2698 for (unsigned i = 0; i < Waste; ++i)
2699 Reg = State->AllocateReg(GPRArgRegs);
2700
2701 if (!Reg)
2702 return;
2703
2704 unsigned Excess = 4 * (ARM::R4 - Reg);
2705
2706 // Special case when NSAA != SP and parameter size greater than size of
2707 // all remained GPR regs. In that case we can't split parameter, we must
2708 // send it to stack. We also must set NCRN to R4, so waste all
2709 // remained registers.
2710 const unsigned NSAAOffset = State->getNextStackOffset();
2711 if (NSAAOffset != 0 && Size > Excess) {
2712 while (State->AllocateReg(GPRArgRegs))
2713 ;
2714 return;
2715 }
2716
2717 // First register for byval parameter is the first register that wasn't
2718 // allocated before this method call, so it would be "reg".
2719 // If parameter is small enough to be saved in range [reg, r4), then
2720 // the end (first after last) register would be reg + param-size-in-regs,
2721 // else parameter would be splitted between registers and stack,
2722 // end register would be r4 in this case.
2723 unsigned ByValRegBegin = Reg;
2724 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2725 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2726 // Note, first register is allocated in the beginning of function already,
2727 // allocate remained amount of registers we need.
2728 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2729 State->AllocateReg(GPRArgRegs);
2730 // A byval parameter that is split between registers and memory needs its
2731 // size truncated here.
2732 // In the case where the entire structure fits in registers, we set the
2733 // size in memory to zero.
2734 Size = std::max<int>(Size - Excess, 0);
2735}
2736
2737/// MatchingStackOffset - Return true if the given stack call argument is
2738/// already available in the same position (relatively) of the caller's
2739/// incoming argument stack.
2740static
2741bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2742 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2743 const TargetInstrInfo *TII) {
2744 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2745 int FI = std::numeric_limits<int>::max();
2746 if (Arg.getOpcode() == ISD::CopyFromReg) {
2747 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2748 if (!Register::isVirtualRegister(VR))
2749 return false;
2750 MachineInstr *Def = MRI->getVRegDef(VR);
2751 if (!Def)
2752 return false;
2753 if (!Flags.isByVal()) {
2754 if (!TII->isLoadFromStackSlot(*Def, FI))
2755 return false;
2756 } else {
2757 return false;
2758 }
2759 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2760 if (Flags.isByVal())
2761 // ByVal argument is passed in as a pointer but it's now being
2762 // dereferenced. e.g.
2763 // define @foo(%struct.X* %A) {
2764 // tail call @bar(%struct.X* byval %A)
2765 // }
2766 return false;
2767 SDValue Ptr = Ld->getBasePtr();
2768 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2769 if (!FINode)
2770 return false;
2771 FI = FINode->getIndex();
2772 } else
2773 return false;
2774
2775 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2775, __PRETTY_FUNCTION__))
;
2776 if (!MFI.isFixedObjectIndex(FI))
2777 return false;
2778 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2779}
2780
2781/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2782/// for tail call optimization. Targets which want to do tail call
2783/// optimization should implement this function.
2784bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2785 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2786 bool isCalleeStructRet, bool isCallerStructRet,
2787 const SmallVectorImpl<ISD::OutputArg> &Outs,
2788 const SmallVectorImpl<SDValue> &OutVals,
2789 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
2790 const bool isIndirect) const {
2791 MachineFunction &MF = DAG.getMachineFunction();
2792 const Function &CallerF = MF.getFunction();
2793 CallingConv::ID CallerCC = CallerF.getCallingConv();
2794
2795 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2795, __PRETTY_FUNCTION__))
;
2796
2797 // Indirect tail calls cannot be optimized for Thumb1 if the args
2798 // to the call take up r0-r3. The reason is that there are no legal registers
2799 // left to hold the pointer to the function to be called.
2800 if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2801 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2802 return false;
2803
2804 // Look for obvious safe cases to perform tail call optimization that do not
2805 // require ABI changes. This is what gcc calls sibcall.
2806
2807 // Exception-handling functions need a special set of instructions to indicate
2808 // a return to the hardware. Tail-calling another function would probably
2809 // break this.
2810 if (CallerF.hasFnAttribute("interrupt"))
2811 return false;
2812
2813 // Also avoid sibcall optimization if either caller or callee uses struct
2814 // return semantics.
2815 if (isCalleeStructRet || isCallerStructRet)
2816 return false;
2817
2818 // Externally-defined functions with weak linkage should not be
2819 // tail-called on ARM when the OS does not support dynamic
2820 // pre-emption of symbols, as the AAELF spec requires normal calls
2821 // to undefined weak functions to be replaced with a NOP or jump to the
2822 // next instruction. The behaviour of branch instructions in this
2823 // situation (as used for tail calls) is implementation-defined, so we
2824 // cannot rely on the linker replacing the tail call with a return.
2825 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2826 const GlobalValue *GV = G->getGlobal();
2827 const Triple &TT = getTargetMachine().getTargetTriple();
2828 if (GV->hasExternalWeakLinkage() &&
2829 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2830 return false;
2831 }
2832
2833 // Check that the call results are passed in the same way.
2834 LLVMContext &C = *DAG.getContext();
2835 if (!CCState::resultsCompatible(
2836 getEffectiveCallingConv(CalleeCC, isVarArg),
2837 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2838 CCAssignFnForReturn(CalleeCC, isVarArg),
2839 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
2840 return false;
2841 // The callee has to preserve all registers the caller needs to preserve.
2842 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2843 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2844 if (CalleeCC != CallerCC) {
2845 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2846 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2847 return false;
2848 }
2849
2850 // If Caller's vararg or byval argument has been split between registers and
2851 // stack, do not perform tail call, since part of the argument is in caller's
2852 // local frame.
2853 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2854 if (AFI_Caller->getArgRegsSaveSize())
2855 return false;
2856
2857 // If the callee takes no arguments then go on to check the results of the
2858 // call.
2859 if (!Outs.empty()) {
2860 // Check if stack adjustment is needed. For now, do not do this if any
2861 // argument is passed on the stack.
2862 SmallVector<CCValAssign, 16> ArgLocs;
2863 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2864 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2865 if (CCInfo.getNextStackOffset()) {
2866 // Check if the arguments are already laid out in the right way as
2867 // the caller's fixed stack objects.
2868 MachineFrameInfo &MFI = MF.getFrameInfo();
2869 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2870 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2871 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2872 i != e;
2873 ++i, ++realArgIdx) {
2874 CCValAssign &VA = ArgLocs[i];
2875 EVT RegVT = VA.getLocVT();
2876 SDValue Arg = OutVals[realArgIdx];
2877 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2878 if (VA.getLocInfo() == CCValAssign::Indirect)
2879 return false;
2880 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
2881 // f64 and vector types are split into multiple registers or
2882 // register/stack-slot combinations. The types will not match
2883 // the registers; give up on memory f64 refs until we figure
2884 // out what to do about this.
2885 if (!VA.isRegLoc())
2886 return false;
2887 if (!ArgLocs[++i].isRegLoc())
2888 return false;
2889 if (RegVT == MVT::v2f64) {
2890 if (!ArgLocs[++i].isRegLoc())
2891 return false;
2892 if (!ArgLocs[++i].isRegLoc())
2893 return false;
2894 }
2895 } else if (!VA.isRegLoc()) {
2896 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2897 MFI, MRI, TII))
2898 return false;
2899 }
2900 }
2901 }
2902
2903 const MachineRegisterInfo &MRI = MF.getRegInfo();
2904 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2905 return false;
2906 }
2907
2908 return true;
2909}
2910
2911bool
2912ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2913 MachineFunction &MF, bool isVarArg,
2914 const SmallVectorImpl<ISD::OutputArg> &Outs,
2915 LLVMContext &Context) const {
2916 SmallVector<CCValAssign, 16> RVLocs;
2917 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2918 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2919}
2920
2921static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2922 const SDLoc &DL, SelectionDAG &DAG) {
2923 const MachineFunction &MF = DAG.getMachineFunction();
2924 const Function &F = MF.getFunction();
2925
2926 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2927
2928 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2929 // version of the "preferred return address". These offsets affect the return
2930 // instruction if this is a return from PL1 without hypervisor extensions.
2931 // IRQ/FIQ: +4 "subs pc, lr, #4"
2932 // SWI: 0 "subs pc, lr, #0"
2933 // ABORT: +4 "subs pc, lr, #4"
2934 // UNDEF: +4/+2 "subs pc, lr, #0"
2935 // UNDEF varies depending on where the exception came from ARM or Thumb
2936 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2937
2938 int64_t LROffset;
2939 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2940 IntKind == "ABORT")
2941 LROffset = 4;
2942 else if (IntKind == "SWI" || IntKind == "UNDEF")
2943 LROffset = 0;
2944 else
2945 report_fatal_error("Unsupported interrupt attribute. If present, value "
2946 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2947
2948 RetOps.insert(RetOps.begin() + 1,
2949 DAG.getConstant(LROffset, DL, MVT::i32, false));
2950
2951 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2952}
2953
2954SDValue
2955ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2956 bool isVarArg,
2957 const SmallVectorImpl<ISD::OutputArg> &Outs,
2958 const SmallVectorImpl<SDValue> &OutVals,
2959 const SDLoc &dl, SelectionDAG &DAG) const {
2960 // CCValAssign - represent the assignment of the return value to a location.
2961 SmallVector<CCValAssign, 16> RVLocs;
2962
2963 // CCState - Info about the registers and stack slots.
2964 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2965 *DAG.getContext());
2966
2967 // Analyze outgoing return values.
2968 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2969
2970 SDValue Flag;
2971 SmallVector<SDValue, 4> RetOps;
2972 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2973 bool isLittleEndian = Subtarget->isLittle();
2974
2975 MachineFunction &MF = DAG.getMachineFunction();
2976 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2977 AFI->setReturnRegsCount(RVLocs.size());
2978
2979 // Report error if cmse entry function returns structure through first ptr arg.
2980 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
2981 // Note: using an empty SDLoc(), as the first line of the function is a
2982 // better place to report than the last line.
2983 DiagnosticInfoUnsupported Diag(
2984 DAG.getMachineFunction().getFunction(),
2985 "secure entry function would return value through pointer",
2986 SDLoc().getDebugLoc());
2987 DAG.getContext()->diagnose(Diag);
2988 }
2989
2990 // Copy the result values into the output registers.
2991 for (unsigned i = 0, realRVLocIdx = 0;
2992 i != RVLocs.size();
2993 ++i, ++realRVLocIdx) {
2994 CCValAssign &VA = RVLocs[i];
2995 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2995, __PRETTY_FUNCTION__))
;
2996
2997 SDValue Arg = OutVals[realRVLocIdx];
2998 bool ReturnF16 = false;
2999
3000 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3001 // Half-precision return values can be returned like this:
3002 //
3003 // t11 f16 = fadd ...
3004 // t12: i16 = bitcast t11
3005 // t13: i32 = zero_extend t12
3006 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3007 //
3008 // to avoid code generation for bitcasts, we simply set Arg to the node
3009 // that produces the f16 value, t11 in this case.
3010 //
3011 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3012 SDValue ZE = Arg.getOperand(0);
3013 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3014 SDValue BC = ZE.getOperand(0);
3015 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3016 Arg = BC.getOperand(0);
3017 ReturnF16 = true;
3018 }
3019 }
3020 }
3021 }
3022
3023 switch (VA.getLocInfo()) {
3024 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3024)
;
3025 case CCValAssign::Full: break;
3026 case CCValAssign::BCvt:
3027 if (!ReturnF16)
3028 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3029 break;
3030 }
3031
3032 // Mask f16 arguments if this is a CMSE nonsecure entry.
3033 auto RetVT = Outs[realRVLocIdx].ArgVT;
3034 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3035 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3036 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3037 } else {
3038 auto LocBits = VA.getLocVT().getSizeInBits();
3039 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3040 SDValue Mask =
3041 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3042 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3043 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3044 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3045 }
3046 }
3047
3048 if (VA.needsCustom() &&
3049 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3050 if (VA.getLocVT() == MVT::v2f64) {
3051 // Extract the first half and return it in two registers.
3052 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3053 DAG.getConstant(0, dl, MVT::i32));
3054 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3055 DAG.getVTList(MVT::i32, MVT::i32), Half);
3056
3057 Chain =
3058 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3059 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3060 Flag = Chain.getValue(1);
3061 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3062 VA = RVLocs[++i]; // skip ahead to next loc
3063 Chain =
3064 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3065 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3066 Flag = Chain.getValue(1);
3067 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3068 VA = RVLocs[++i]; // skip ahead to next loc
3069
3070 // Extract the 2nd half and fall through to handle it as an f64 value.
3071 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3072 DAG.getConstant(1, dl, MVT::i32));
3073 }
3074 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3075 // available.
3076 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3077 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3078 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3079 fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3080 Flag = Chain.getValue(1);
3081 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3082 VA = RVLocs[++i]; // skip ahead to next loc
3083 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3084 fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3085 } else
3086 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3087
3088 // Guarantee that all emitted copies are
3089 // stuck together, avoiding something bad.
3090 Flag = Chain.getValue(1);
3091 RetOps.push_back(DAG.getRegister(
3092 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3093 }
3094 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3095 const MCPhysReg *I =
3096 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3097 if (I) {
3098 for (; *I; ++I) {
3099 if (ARM::GPRRegClass.contains(*I))
3100 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3101 else if (ARM::DPRRegClass.contains(*I))
3102 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3103 else
3104 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3104)
;
3105 }
3106 }
3107
3108 // Update chain and glue.
3109 RetOps[0] = Chain;
3110 if (Flag.getNode())
3111 RetOps.push_back(Flag);
3112
3113 // CPUs which aren't M-class use a special sequence to return from
3114 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3115 // though we use "subs pc, lr, #N").
3116 //
3117 // M-class CPUs actually use a normal return sequence with a special
3118 // (hardware-provided) value in LR, so the normal code path works.
3119 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3120 !Subtarget->isMClass()) {
3121 if (Subtarget->isThumb1Only())
3122 report_fatal_error("interrupt attribute is not supported in Thumb1");
3123 return LowerInterruptReturn(RetOps, dl, DAG);
3124 }
3125
3126 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
3127 ARMISD::RET_FLAG;
3128 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3129}
3130
3131bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3132 if (N->getNumValues() != 1)
3133 return false;
3134 if (!N->hasNUsesOfValue(1, 0))
3135 return false;
3136
3137 SDValue TCChain = Chain;
3138 SDNode *Copy = *N->use_begin();
3139 if (Copy->getOpcode() == ISD::CopyToReg) {
3140 // If the copy has a glue operand, we conservatively assume it isn't safe to
3141 // perform a tail call.
3142 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3143 return false;
3144 TCChain = Copy->getOperand(0);
3145 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3146 SDNode *VMov = Copy;
3147 // f64 returned in a pair of GPRs.
3148 SmallPtrSet<SDNode*, 2> Copies;
3149 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3150 UI != UE; ++UI) {
3151 if (UI->getOpcode() != ISD::CopyToReg)
3152 return false;
3153 Copies.insert(*UI);
3154 }
3155 if (Copies.size() > 2)
3156 return false;
3157
3158 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3159 UI != UE; ++UI) {
3160 SDValue UseChain = UI->getOperand(0);
3161 if (Copies.count(UseChain.getNode()))
3162 // Second CopyToReg
3163 Copy = *UI;
3164 else {
3165 // We are at the top of this chain.
3166 // If the copy has a glue operand, we conservatively assume it
3167 // isn't safe to perform a tail call.
3168 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
3169 return false;
3170 // First CopyToReg
3171 TCChain = UseChain;
3172 }
3173 }
3174 } else if (Copy->getOpcode() == ISD::BITCAST) {
3175 // f32 returned in a single GPR.
3176 if (!Copy->hasOneUse())
3177 return false;
3178 Copy = *Copy->use_begin();
3179 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3180 return false;
3181 // If the copy has a glue operand, we conservatively assume it isn't safe to
3182 // perform a tail call.
3183 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3184 return false;
3185 TCChain = Copy->getOperand(0);
3186 } else {
3187 return false;
3188 }
3189
3190 bool HasRet = false;
3191 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3192 UI != UE; ++UI) {
3193 if (UI->getOpcode() != ARMISD::RET_FLAG &&
3194 UI->getOpcode() != ARMISD::INTRET_FLAG)
3195 return false;
3196 HasRet = true;
3197 }
3198
3199 if (!HasRet)
3200 return false;
3201
3202 Chain = TCChain;
3203 return true;
3204}
3205
3206bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3207 if (!Subtarget->supportsTailCall())
3208 return false;
3209
3210 if (!CI->isTailCall())
3211 return false;
3212
3213 return true;
3214}
3215
3216// Trying to write a 64 bit value so need to split into two 32 bit values first,
3217// and pass the lower and high parts through.
3218static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
3219 SDLoc DL(Op);
3220 SDValue WriteValue = Op->getOperand(2);
3221
3222 // This function is only supposed to be called for i64 type argument.
3223 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3224, __PRETTY_FUNCTION__))
3224 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3224, __PRETTY_FUNCTION__))
;
3225
3226 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3227 DAG.getConstant(0, DL, MVT::i32));
3228 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3229 DAG.getConstant(1, DL, MVT::i32));
3230 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3231 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3232}
3233
3234// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3235// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3236// one of the above mentioned nodes. It has to be wrapped because otherwise
3237// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3238// be used to form addressing mode. These wrapped nodes will be selected
3239// into MOVi.
3240SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 EVT PtrVT = Op.getValueType();
3243 // FIXME there is no actual debug info here
3244 SDLoc dl(Op);
3245 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3246 SDValue Res;
3247
3248 // When generating execute-only code Constant Pools must be promoted to the
3249 // global data section. It's a bit ugly that we can't share them across basic
3250 // blocks, but this way we guarantee that execute-only behaves correct with
3251 // position-independent addressing modes.
3252 if (Subtarget->genExecuteOnly()) {
3253 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3254 auto T = const_cast<Type*>(CP->getType());
3255 auto C = const_cast<Constant*>(CP->getConstVal());
3256 auto M = const_cast<Module*>(DAG.getMachineFunction().
3257 getFunction().getParent());
3258 auto GV = new GlobalVariable(
3259 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3260 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3261 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3262 Twine(AFI->createPICLabelUId())
3263 );
3264 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3265 dl, PtrVT);
3266 return LowerGlobalAddress(GA, DAG);
3267 }
3268
3269 if (CP->isMachineConstantPoolEntry())
3270 Res =
3271 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3272 else
3273 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3274 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3275}
3276
3277unsigned ARMTargetLowering::getJumpTableEncoding() const {
3278 return MachineJumpTableInfo::EK_Inline;
3279}
3280
3281SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3282 SelectionDAG &DAG) const {
3283 MachineFunction &MF = DAG.getMachineFunction();
3284 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3285 unsigned ARMPCLabelIndex = 0;
3286 SDLoc DL(Op);
3287 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3288 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3289 SDValue CPAddr;
3290 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3291 if (!IsPositionIndependent) {
3292 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3293 } else {
3294 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3295 ARMPCLabelIndex = AFI->createPICLabelUId();
3296 ARMConstantPoolValue *CPV =
3297 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3298 ARMCP::CPBlockAddress, PCAdj);
3299 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3300 }
3301 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3302 SDValue Result = DAG.getLoad(
3303 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3304 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3305 if (!IsPositionIndependent)
3306 return Result;
3307 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3308 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3309}
3310
3311/// Convert a TLS address reference into the correct sequence of loads
3312/// and calls to compute the variable's address for Darwin, and return an
3313/// SDValue containing the final node.
3314
3315/// Darwin only has one TLS scheme which must be capable of dealing with the
3316/// fully general situation, in the worst case. This means:
3317/// + "extern __thread" declaration.
3318/// + Defined in a possibly unknown dynamic library.
3319///
3320/// The general system is that each __thread variable has a [3 x i32] descriptor
3321/// which contains information used by the runtime to calculate the address. The
3322/// only part of this the compiler needs to know about is the first word, which
3323/// contains a function pointer that must be called with the address of the
3324/// entire descriptor in "r0".
3325///
3326/// Since this descriptor may be in a different unit, in general access must
3327/// proceed along the usual ARM rules. A common sequence to produce is:
3328///
3329/// movw rT1, :lower16:_var$non_lazy_ptr
3330/// movt rT1, :upper16:_var$non_lazy_ptr
3331/// ldr r0, [rT1]
3332/// ldr rT2, [r0]
3333/// blx rT2
3334/// [...address now in r0...]
3335SDValue
3336ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3337 SelectionDAG &DAG) const {
3338 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3339, __PRETTY_FUNCTION__))
3339 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3339, __PRETTY_FUNCTION__))
;
3340 SDLoc DL(Op);
3341
3342 // First step is to get the address of the actua global symbol. This is where
3343 // the TLS descriptor lives.
3344 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3345
3346 // The first entry in the descriptor is a function pointer that we must call
3347 // to obtain the address of the variable.
3348 SDValue Chain = DAG.getEntryNode();
3349 SDValue FuncTLVGet = DAG.getLoad(
3350 MVT::i32, DL, Chain, DescAddr,
3351 MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
3352 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
3353 MachineMemOperand::MOInvariant);
3354 Chain = FuncTLVGet.getValue(1);
3355
3356 MachineFunction &F = DAG.getMachineFunction();
3357 MachineFrameInfo &MFI = F.getFrameInfo();
3358 MFI.setAdjustsStack(true);
3359
3360 // TLS calls preserve all registers except those that absolutely must be
3361 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3362 // silly).
3363 auto TRI =
3364 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3365 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3366 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3367
3368 // Finally, we can make the call. This is just a degenerate version of a
3369 // normal AArch64 call node: r0 takes the address of the descriptor, and
3370 // returns the address of the variable in this thread.
3371 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3372 Chain =
3373 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3374 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3375 DAG.getRegisterMask(Mask), Chain.getValue(1));
3376 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3377}
3378
3379SDValue
3380ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3381 SelectionDAG &DAG) const {
3382 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3382, __PRETTY_FUNCTION__))
;
3383
3384 SDValue Chain = DAG.getEntryNode();
3385 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3386 SDLoc DL(Op);
3387
3388 // Load the current TEB (thread environment block)
3389 SDValue Ops[] = {Chain,
3390 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3391 DAG.getTargetConstant(15, DL, MVT::i32),
3392 DAG.getTargetConstant(0, DL, MVT::i32),
3393 DAG.getTargetConstant(13, DL, MVT::i32),
3394 DAG.getTargetConstant(0, DL, MVT::i32),
3395 DAG.getTargetConstant(2, DL, MVT::i32)};
3396 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3397 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3398
3399 SDValue TEB = CurrentTEB.getValue(0);
3400 Chain = CurrentTEB.getValue(1);
3401
3402 // Load the ThreadLocalStoragePointer from the TEB
3403 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3404 SDValue TLSArray =
3405 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3406 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3407
3408 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3409 // offset into the TLSArray.
3410
3411 // Load the TLS index from the C runtime
3412 SDValue TLSIndex =
3413 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3414 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3415 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3416
3417 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3418 DAG.getConstant(2, DL, MVT::i32));
3419 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3420 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3421 MachinePointerInfo());
3422
3423 // Get the offset of the start of the .tls section (section base)
3424 const auto *GA = cast<GlobalAddressSDNode>(Op);
3425 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3426 SDValue Offset = DAG.getLoad(
3427 PtrVT, DL, Chain,
3428 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3429 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3430 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3431
3432 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3433}
3434
3435// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3436SDValue
3437ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3438 SelectionDAG &DAG) const {
3439 SDLoc dl(GA);
3440 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3441 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3442 MachineFunction &MF = DAG.getMachineFunction();
3443 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3444 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3445 ARMConstantPoolValue *CPV =
3446 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3447 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3448 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3449 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3450 Argument = DAG.getLoad(
3451 PtrVT, dl, DAG.getEntryNode(), Argument,
3452 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3453 SDValue Chain = Argument.getValue(1);
3454
3455 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3456 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3457
3458 // call __tls_get_addr.
3459 ArgListTy Args;
3460 ArgListEntry Entry;
3461 Entry.Node = Argument;
3462 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3463 Args.push_back(Entry);
3464
3465 // FIXME: is there useful debug info available here?
3466 TargetLowering::CallLoweringInfo CLI(DAG);
3467 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3468 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
3469 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3470
3471 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3472 return CallResult.first;
3473}
3474
3475// Lower ISD::GlobalTLSAddress using the "initial exec" or
3476// "local exec" model.
3477SDValue
3478ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3479 SelectionDAG &DAG,
3480 TLSModel::Model model) const {
3481 const GlobalValue *GV = GA->getGlobal();
3482 SDLoc dl(GA);
3483 SDValue Offset;
3484 SDValue Chain = DAG.getEntryNode();
3485 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3486 // Get the Thread Pointer
3487 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3488
3489 if (model == TLSModel::InitialExec) {
3490 MachineFunction &MF = DAG.getMachineFunction();
3491 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3492 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3493 // Initial exec model.
3494 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3495 ARMConstantPoolValue *CPV =
3496 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3497 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
3498 true);
3499 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3500 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3501 Offset = DAG.getLoad(
3502 PtrVT, dl, Chain, Offset,
3503 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3504 Chain = Offset.getValue(1);
3505
3506 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3507 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3508
3509 Offset = DAG.getLoad(
3510 PtrVT, dl, Chain, Offset,
3511 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3512 } else {
3513 // local exec model
3514 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3514, __PRETTY_FUNCTION__))
;
3515 ARMConstantPoolValue *CPV =
3516 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3517 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3518 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3519 Offset = DAG.getLoad(
3520 PtrVT, dl, Chain, Offset,
3521 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3522 }
3523
3524 // The address of the thread local variable is the add of the thread
3525 // pointer with the offset of the variable.
3526 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3527}
3528
3529SDValue
3530ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3531 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3532 if (DAG.getTarget().useEmulatedTLS())
3533 return LowerToTLSEmulatedModel(GA, DAG);
3534
3535 if (Subtarget->isTargetDarwin())
3536 return LowerGlobalTLSAddressDarwin(Op, DAG);
3537
3538 if (Subtarget->isTargetWindows())
3539 return LowerGlobalTLSAddressWindows(Op, DAG);
3540
3541 // TODO: implement the "local dynamic" model
3542 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3542, __PRETTY_FUNCTION__))
;
3543 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3544
3545 switch (model) {
3546 case TLSModel::GeneralDynamic:
3547 case TLSModel::LocalDynamic:
3548 return LowerToTLSGeneralDynamicModel(GA, DAG);
3549 case TLSModel::InitialExec:
3550 case TLSModel::LocalExec:
3551 return LowerToTLSExecModels(GA, DAG, model);
3552 }
3553 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3553)
;
3554}
3555
3556/// Return true if all users of V are within function F, looking through
3557/// ConstantExprs.
3558static bool allUsersAreInFunction(const Value *V, const Function *F) {
3559 SmallVector<const User*,4> Worklist(V->users());
3560 while (!Worklist.empty()) {
3561 auto *U = Worklist.pop_back_val();
3562 if (isa<ConstantExpr>(U)) {
3563 append_range(Worklist, U->users());
3564 continue;
3565 }
3566
3567 auto *I = dyn_cast<Instruction>(U);
3568 if (!I || I->getParent()->getParent() != F)
3569 return false;
3570 }
3571 return true;
3572}
3573
3574static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3575 const GlobalValue *GV, SelectionDAG &DAG,
3576 EVT PtrVT, const SDLoc &dl) {
3577 // If we're creating a pool entry for a constant global with unnamed address,
3578 // and the global is small enough, we can emit it inline into the constant pool
3579 // to save ourselves an indirection.
3580 //
3581 // This is a win if the constant is only used in one function (so it doesn't
3582 // need to be duplicated) or duplicating the constant wouldn't increase code
3583 // size (implying the constant is no larger than 4 bytes).
3584 const Function &F = DAG.getMachineFunction().getFunction();
3585
3586 // We rely on this decision to inline being idemopotent and unrelated to the
3587 // use-site. We know that if we inline a variable at one use site, we'll
3588 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3589 // doesn't know about this optimization, so bail out if it's enabled else
3590 // we could decide to inline here (and thus never emit the GV) but require
3591 // the GV from fast-isel generated code.
3592 if (!EnableConstpoolPromotion ||
3593 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3594 return SDValue();
3595
3596 auto *GVar = dyn_cast<GlobalVariable>(GV);
3597 if (!GVar || !GVar->hasInitializer() ||
3598 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3599 !GVar->hasLocalLinkage())
3600 return SDValue();
3601
3602 // If we inline a value that contains relocations, we move the relocations
3603 // from .data to .text. This is not allowed in position-independent code.
3604 auto *Init = GVar->getInitializer();
3605 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3606 Init->needsDynamicRelocation())
3607 return SDValue();
3608
3609 // The constant islands pass can only really deal with alignment requests
3610 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3611 // any type wanting greater alignment requirements than 4 bytes. We also
3612 // can only promote constants that are multiples of 4 bytes in size or
3613 // are paddable to a multiple of 4. Currently we only try and pad constants
3614 // that are strings for simplicity.
3615 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3616 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3617 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3618 unsigned RequiredPadding = 4 - (Size % 4);
3619 bool PaddingPossible =
3620 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3621 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3622 Size == 0)
3623 return SDValue();
3624
3625 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3626 MachineFunction &MF = DAG.getMachineFunction();
3627 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3628
3629 // We can't bloat the constant pool too much, else the ConstantIslands pass
3630 // may fail to converge. If we haven't promoted this global yet (it may have
3631 // multiple uses), and promoting it would increase the constant pool size (Sz
3632 // > 4), ensure we have space to do so up to MaxTotal.
3633 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3634 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3635 ConstpoolPromotionMaxTotal)
3636 return SDValue();
3637
3638 // This is only valid if all users are in a single function; we can't clone
3639 // the constant in general. The LLVM IR unnamed_addr allows merging
3640 // constants, but not cloning them.
3641 //
3642 // We could potentially allow cloning if we could prove all uses of the
3643 // constant in the current function don't care about the address, like
3644 // printf format strings. But that isn't implemented for now.
3645 if (!allUsersAreInFunction(GVar, &F))
3646 return SDValue();
3647
3648 // We're going to inline this global. Pad it out if needed.
3649 if (RequiredPadding != 4) {
3650 StringRef S = CDAInit->getAsString();
3651
3652 SmallVector<uint8_t,16> V(S.size());
3653 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3654 while (RequiredPadding--)
3655 V.push_back(0);
3656 Init = ConstantDataArray::get(*DAG.getContext(), V);
3657 }
3658
3659 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3660 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3661 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3662 AFI->markGlobalAsPromotedToConstantPool(GVar);
3663 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3664 PaddedSize - 4);
3665 }
3666 ++NumConstpoolPromoted;
3667 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3668}
3669
3670bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3671 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3672 if (!(GV = GA->getBaseObject()))
3673 return false;
3674 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3675 return V->isConstant();
3676 return isa<Function>(GV);
3677}
3678
3679SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3680 SelectionDAG &DAG) const {
3681 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3682 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3682)
;
3683 case Triple::COFF:
3684 return LowerGlobalAddressWindows(Op, DAG);
3685 case Triple::ELF:
3686 return LowerGlobalAddressELF(Op, DAG);
3687 case Triple::MachO:
3688 return LowerGlobalAddressDarwin(Op, DAG);
3689 }
3690}
3691
3692SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3693 SelectionDAG &DAG) const {
3694 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3695 SDLoc dl(Op);
3696 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3697 const TargetMachine &TM = getTargetMachine();
3698 bool IsRO = isReadOnly(GV);
3699
3700 // promoteToConstantPool only if not generating XO text section
3701 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3702 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3703 return V;
3704
3705 if (isPositionIndependent()) {
3706 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3707 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3708 UseGOT_PREL ? ARMII::MO_GOT : 0);
3709 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3710 if (UseGOT_PREL)
3711 Result =
3712 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3713 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3714 return Result;
3715 } else if (Subtarget->isROPI() && IsRO) {
3716 // PC-relative.
3717 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3718 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3719 return Result;
3720 } else if (Subtarget->isRWPI() && !IsRO) {
3721 // SB-relative.
3722 SDValue RelAddr;
3723 if (Subtarget->useMovt()) {
3724 ++NumMovwMovt;
3725 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3726 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3727 } else { // use literal pool for address constant
3728 ARMConstantPoolValue *CPV =
3729 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3730 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3731 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3732 RelAddr = DAG.getLoad(
3733 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3734 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3735 }
3736 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3737 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3738 return Result;
3739 }
3740
3741 // If we have T2 ops, we can materialize the address directly via movt/movw
3742 // pair. This is always cheaper.
3743 if (Subtarget->useMovt()) {
3744 ++NumMovwMovt;
3745 // FIXME: Once remat is capable of dealing with instructions with register
3746 // operands, expand this into two nodes.
3747 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3748 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3749 } else {
3750 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3751 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3752 return DAG.getLoad(
3753 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3754 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3755 }
3756}
3757
3758SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
3761 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
;
3762 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3763 SDLoc dl(Op);
3764 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3765
3766 if (Subtarget->useMovt())
3767 ++NumMovwMovt;
3768
3769 // FIXME: Once remat is capable of dealing with instructions with register
3770 // operands, expand this into multiple nodes
3771 unsigned Wrapper =
3772 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3773
3774 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3775 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3776
3777 if (Subtarget->isGVIndirectSymbol(GV))
3778 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3779 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3780 return Result;
3781}
3782
3783SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3784 SelectionDAG &DAG) const {
3785 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3785, __PRETTY_FUNCTION__))
;
3786 assert(Subtarget->useMovt() &&((Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3787, __PRETTY_FUNCTION__))
3787 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3787, __PRETTY_FUNCTION__))
;
3788 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3789, __PRETTY_FUNCTION__))
3789 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3789, __PRETTY_FUNCTION__))
;
3790
3791 const TargetMachine &TM = getTargetMachine();
3792 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3793 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3794 if (GV->hasDLLImportStorageClass())
3795 TargetFlags = ARMII::MO_DLLIMPORT;
3796 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3797 TargetFlags = ARMII::MO_COFFSTUB;
3798 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3799 SDValue Result;
3800 SDLoc DL(Op);
3801
3802 ++NumMovwMovt;
3803
3804 // FIXME: Once remat is capable of dealing with instructions with register
3805 // operands, expand this into two nodes.
3806 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3807 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3808 TargetFlags));
3809 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3810 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3811 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3812 return Result;
3813}
3814
3815SDValue
3816ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3817 SDLoc dl(Op);
3818 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3819 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3820 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3821 Op.getOperand(1), Val);
3822}
3823
3824SDValue
3825ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3826 SDLoc dl(Op);
3827 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3828 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3829}
3830
3831SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3832 SelectionDAG &DAG) const {
3833 SDLoc dl(Op);
3834 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3835 Op.getOperand(0));
3836}
3837
3838SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3839 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3840 unsigned IntNo =
3841 cast<ConstantSDNode>(
3842 Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
3843 ->getZExtValue();
3844 switch (IntNo) {
3845 default:
3846 return SDValue(); // Don't custom lower most intrinsics.
3847 case Intrinsic::arm_gnu_eabi_mcount: {
3848 MachineFunction &MF = DAG.getMachineFunction();
3849 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3850 SDLoc dl(Op);
3851 SDValue Chain = Op.getOperand(0);
3852 // call "\01__gnu_mcount_nc"
3853 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3854 const uint32_t *Mask =
3855 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3856 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3856, __PRETTY_FUNCTION__))
;
3857 // Mark LR an implicit live-in.
3858 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3859 SDValue ReturnAddress =
3860 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3861 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3862 SDValue Callee =
3863 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3864 SDValue RegisterMask = DAG.getRegisterMask(Mask);
3865 if (Subtarget->isThumb())
3866 return SDValue(
3867 DAG.getMachineNode(
3868 ARM::tBL_PUSHLR, dl, ResultTys,
3869 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3870 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3871 0);
3872 return SDValue(
3873 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3874 {ReturnAddress, Callee, RegisterMask, Chain}),
3875 0);
3876 }
3877 }
3878}
3879
3880SDValue
3881ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3882 const ARMSubtarget *Subtarget) const {
3883 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3884 SDLoc dl(Op);
3885 switch (IntNo) {
3886 default: return SDValue(); // Don't custom lower most intrinsics.
3887 case Intrinsic::thread_pointer: {
3888 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3889 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3890 }
3891 case Intrinsic::arm_cls: {
3892 const SDValue &Operand = Op.getOperand(1);
3893 const EVT VTy = Op.getValueType();
3894 SDValue SRA =
3895 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
3896 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
3897 SDValue SHL =
3898 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
3899 SDValue OR =
3900 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
3901 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
3902 return Result;
3903 }
3904 case Intrinsic::arm_cls64: {
3905 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
3906 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
3907 const SDValue &Operand = Op.getOperand(1);
3908 const EVT VTy = Op.getValueType();
3909
3910 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3911 DAG.getConstant(1, dl, VTy));
3912 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3913 DAG.getConstant(0, dl, VTy));
3914 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
3915 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
3916 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
3917 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
3918 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
3919 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
3920 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
3921 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
3922 SDValue CheckLo =
3923 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
3924 SDValue HiIsZero =
3925 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
3926 SDValue AdjustedLo =
3927 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
3928 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
3929 SDValue Result =
3930 DAG.getSelect(dl, VTy, CheckLo,
3931 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
3932 return Result;
3933 }
3934 case Intrinsic::eh_sjlj_lsda: {
3935 MachineFunction &MF = DAG.getMachineFunction();
3936 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3937 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3938 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3939 SDValue CPAddr;
3940 bool IsPositionIndependent = isPositionIndependent();
3941 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3942 ARMConstantPoolValue *CPV =
3943 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3944 ARMCP::CPLSDA, PCAdj);
3945 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3946 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3947 SDValue Result = DAG.getLoad(
3948 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3949 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3950
3951 if (IsPositionIndependent) {
3952 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3953 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3954 }
3955 return Result;
3956 }
3957 case Intrinsic::arm_neon_vabs:
3958 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3959 Op.getOperand(1));
3960 case Intrinsic::arm_neon_vmulls:
3961 case Intrinsic::arm_neon_vmullu: {
3962 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3963 ? ARMISD::VMULLs : ARMISD::VMULLu;
3964 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3965 Op.getOperand(1), Op.getOperand(2));
3966 }
3967 case Intrinsic::arm_neon_vminnm:
3968 case Intrinsic::arm_neon_vmaxnm: {
3969 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3970 ? ISD::FMINNUM : ISD::FMAXNUM;
3971 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2));
3973 }
3974 case Intrinsic::arm_neon_vminu:
3975 case Intrinsic::arm_neon_vmaxu: {
3976 if (Op.getValueType().isFloatingPoint())
3977 return SDValue();
3978 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3979 ? ISD::UMIN : ISD::UMAX;
3980 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 }
3983 case Intrinsic::arm_neon_vmins:
3984 case Intrinsic::arm_neon_vmaxs: {
3985 // v{min,max}s is overloaded between signed integers and floats.
3986 if (!Op.getValueType().isFloatingPoint()) {
3987 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3988 ? ISD::SMIN : ISD::SMAX;
3989 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 }
3992 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3993 ? ISD::FMINIMUM : ISD::FMAXIMUM;
3994 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3995 Op.getOperand(1), Op.getOperand(2));
3996 }
3997 case Intrinsic::arm_neon_vtbl1:
3998 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::arm_neon_vtbl2:
4001 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4003 case Intrinsic::arm_mve_pred_i2v:
4004 case Intrinsic::arm_mve_pred_v2i:
4005 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4006 Op.getOperand(1));
4007 case Intrinsic::arm_mve_vreinterpretq:
4008 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4009 Op.getOperand(1));
4010 case Intrinsic::arm_mve_lsll:
4011 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4012 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4013 case Intrinsic::arm_mve_asrl:
4014 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4015 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4016 }
4017}
4018
4019static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4020 const ARMSubtarget *Subtarget) {
4021 SDLoc dl(Op);
4022 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4023 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4024 if (SSID == SyncScope::SingleThread)
4025 return Op;
4026
4027 if (!Subtarget->hasDataBarrier()) {
4028 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4029 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4030 // here.
4031 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
4032 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
;
4033 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4034 DAG.getConstant(0, dl, MVT::i32));
4035 }
4036
4037 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4038 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4039 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
4040 if (Subtarget->isMClass()) {
4041 // Only a full system barrier exists in the M-class architectures.
4042 Domain = ARM_MB::SY;
4043 } else if (Subtarget->preferISHSTBarriers() &&
4044 Ord == AtomicOrdering::Release) {
4045 // Swift happens to implement ISHST barriers in a way that's compatible with
4046 // Release semantics but weaker than ISH so we'd be fools not to use
4047 // it. Beware: other processors probably don't!
4048 Domain = ARM_MB::ISHST;
4049 }
4050
4051 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4052 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4053 DAG.getConstant(Domain, dl, MVT::i32));
4054}
4055
4056static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
4057 const ARMSubtarget *Subtarget) {
4058 // ARM pre v5TE and Thumb1 does not have preload instructions.
4059 if (!(Subtarget->isThumb2() ||
4060 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4061 // Just preserve the chain.
4062 return Op.getOperand(0);
4063
4064 SDLoc dl(Op);
4065 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4066 if (!isRead &&
4067 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4068 // ARMv7 with MP extension has PLDW.
4069 return Op.getOperand(0);
4070
4071 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4072 if (Subtarget->isThumb()) {
4073 // Invert the bits.
4074 isRead = ~isRead & 1;
4075 isData = ~isData & 1;
4076 }
4077
4078 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4079 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4080 DAG.getConstant(isData, dl, MVT::i32));
4081}
4082
4083static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
4084 MachineFunction &MF = DAG.getMachineFunction();
4085 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4086
4087 // vastart just stores the address of the VarArgsFrameIndex slot into the
4088 // memory location argument.
4089 SDLoc dl(Op);
4090 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4091 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4092 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4093 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4094 MachinePointerInfo(SV));
4095}
4096
4097SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4098 CCValAssign &NextVA,
4099 SDValue &Root,
4100 SelectionDAG &DAG,
4101 const SDLoc &dl) const {
4102 MachineFunction &MF = DAG.getMachineFunction();
4103 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4104
4105 const TargetRegisterClass *RC;
4106 if (AFI->isThumb1OnlyFunction())
4107 RC = &ARM::tGPRRegClass;
4108 else
4109 RC = &ARM::GPRRegClass;
4110
4111 // Transform the arguments stored in physical registers into virtual ones.
4112 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4113 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4114
4115 SDValue ArgValue2;
4116 if (NextVA.isMemLoc()) {
4117 MachineFrameInfo &MFI = MF.getFrameInfo();
4118 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4119
4120 // Create load node to retrieve arguments from the stack.
4121 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4122 ArgValue2 = DAG.getLoad(
4123 MVT::i32, dl, Root, FIN,
4124 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4125 } else {
4126 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4127 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4128 }
4129 if (!Subtarget->isLittle())
4130 std::swap (ArgValue, ArgValue2);
4131 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4132}
4133
4134// The remaining GPRs hold either the beginning of variable-argument
4135// data, or the beginning of an aggregate passed by value (usually
4136// byval). Either way, we allocate stack slots adjacent to the data
4137// provided by our caller, and store the unallocated registers there.
4138// If this is a variadic function, the va_list pointer will begin with
4139// these values; otherwise, this reassembles a (byval) structure that
4140// was split between registers and memory.
4141// Return: The frame index registers were stored into.
4142int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4143 const SDLoc &dl, SDValue &Chain,
4144 const Value *OrigArg,
4145 unsigned InRegsParamRecordIdx,
4146 int ArgOffset, unsigned ArgSize) const {
4147 // Currently, two use-cases possible:
4148 // Case #1. Non-var-args function, and we meet first byval parameter.
4149 // Setup first unallocated register as first byval register;
4150 // eat all remained registers
4151 // (these two actions are performed by HandleByVal method).
4152 // Then, here, we initialize stack frame with
4153 // "store-reg" instructions.
4154 // Case #2. Var-args function, that doesn't contain byval parameters.
4155 // The same: eat all remained unallocated registers,
4156 // initialize stack frame.
4157
4158 MachineFunction &MF = DAG.getMachineFunction();
4159 MachineFrameInfo &MFI = MF.getFrameInfo();
4160 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4161 unsigned RBegin, REnd;
4162 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4163 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4164 } else {
4165 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4166 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4167 REnd = ARM::R4;
4168 }
4169
4170 if (REnd != RBegin)
4171 ArgOffset = -4 * (ARM::R4 - RBegin);
4172
4173 auto PtrVT = getPointerTy(DAG.getDataLayout());
4174 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4175 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4176
4177 SmallVector<SDValue, 4> MemOps;
4178 const TargetRegisterClass *RC =
4179 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4180
4181 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4182 unsigned VReg = MF.addLiveIn(Reg, RC);
4183 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4184 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4185 MachinePointerInfo(OrigArg, 4 * i));
4186 MemOps.push_back(Store);
4187 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4188 }
4189
4190 if (!MemOps.empty())
4191 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4192 return FrameIndex;
4193}
4194
4195// Setup stack frame, the va_list pointer will start from.
4196void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4197 const SDLoc &dl, SDValue &Chain,
4198 unsigned ArgOffset,
4199 unsigned TotalArgRegsSaveSize,
4200 bool ForceMutable) const {
4201 MachineFunction &MF = DAG.getMachineFunction();
4202 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4203
4204 // Try to store any remaining integer argument regs
4205 // to their spots on the stack so that they may be loaded by dereferencing
4206 // the result of va_next.
4207 // If there is no regs to be stored, just point address after last
4208 // argument passed via stack.
4209 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4210 CCInfo.getInRegsParamsCount(),
4211 CCInfo.getNextStackOffset(),
4212 std::max(4U, TotalArgRegsSaveSize));
4213 AFI->setVarArgsFrameIndex(FrameIndex);
4214}
4215
4216bool ARMTargetLowering::splitValueIntoRegisterParts(
4217 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4218 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
4219 bool IsABIRegCopy = CC.hasValue();
4220 EVT ValueVT = Val.getValueType();
4221 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4222 PartVT == MVT::f32) {
4223 unsigned ValueBits = ValueVT.getSizeInBits();
4224 unsigned PartBits = PartVT.getSizeInBits();
4225 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4226 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4227 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4228 Parts[0] = Val;
4229 return true;
4230 }
4231 return false;
4232}
4233
4234SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4235 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4236 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
4237 bool IsABIRegCopy = CC.hasValue();
4238 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4239 PartVT == MVT::f32) {
4240 unsigned ValueBits = ValueVT.getSizeInBits();
4241 unsigned PartBits = PartVT.getSizeInBits();
4242 SDValue Val = Parts[0];
4243
4244 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4245 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4246 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4247 return Val;
4248 }
4249 return SDValue();
4250}
4251
4252SDValue ARMTargetLowering::LowerFormalArguments(
4253 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4254 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4255 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4256 MachineFunction &MF = DAG.getMachineFunction();
4257 MachineFrameInfo &MFI = MF.getFrameInfo();
4258
4259 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4260
4261 // Assign locations to all of the incoming arguments.
4262 SmallVector<CCValAssign, 16> ArgLocs;
4263 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4264 *DAG.getContext());
4265 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4266
4267 SmallVector<SDValue, 16> ArgValues;
4268 SDValue ArgValue;
4269 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4270 unsigned CurArgIdx = 0;
4271
4272 // Initially ArgRegsSaveSize is zero.
4273 // Then we increase this value each time we meet byval parameter.
4274 // We also increase this value in case of varargs function.
4275 AFI->setArgRegsSaveSize(0);
4276
4277 // Calculate the amount of stack space that we need to allocate to store
4278 // byval and variadic arguments that are passed in registers.
4279 // We need to know this before we allocate the first byval or variadic
4280 // argument, as they will be allocated a stack slot below the CFA (Canonical
4281 // Frame Address, the stack pointer at entry to the function).
4282 unsigned ArgRegBegin = ARM::R4;
4283 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4284 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4285 break;
4286
4287 CCValAssign &VA = ArgLocs[i];
4288 unsigned Index = VA.getValNo();
4289 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4290 if (!Flags.isByVal())
4291 continue;
4292
4293 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4293, __PRETTY_FUNCTION__))
;
4294 unsigned RBegin, REnd;
4295 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4296 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4297
4298 CCInfo.nextInRegsParam();
4299 }
4300 CCInfo.rewindByValRegsInfo();
4301
4302 int lastInsIndex = -1;
4303 if (isVarArg && MFI.hasVAStart()) {
4304 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4305 if (RegIdx != array_lengthof(GPRArgRegs))
4306 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4307 }
4308
4309 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4310 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4311 auto PtrVT = getPointerTy(DAG.getDataLayout());
4312
4313 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4314 CCValAssign &VA = ArgLocs[i];
4315 if (Ins[VA.getValNo()].isOrigArg()) {
4316 std::advance(CurOrigArg,
4317 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4318 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4319 }
4320 // Arguments stored in registers.
4321 if (VA.isRegLoc()) {
4322 EVT RegVT = VA.getLocVT();
4323
4324 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4325 // f64 and vector types are split up into multiple registers or
4326 // combinations of registers and stack slots.
4327 SDValue ArgValue1 =
4328 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4329 VA = ArgLocs[++i]; // skip ahead to next loc
4330 SDValue ArgValue2;
4331 if (VA.isMemLoc()) {
4332 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4333 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4334 ArgValue2 = DAG.getLoad(
4335 MVT::f64, dl, Chain, FIN,
4336 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4337 } else {
4338 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4339 }
4340 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4341 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4342 ArgValue1, DAG.getIntPtrConstant(0, dl));
4343 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4344 ArgValue2, DAG.getIntPtrConstant(1, dl));
4345 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4346 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4347 } else {
4348 const TargetRegisterClass *RC;
4349
4350 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4351 RC = &ARM::HPRRegClass;
4352 else if (RegVT == MVT::f32)
4353 RC = &ARM::SPRRegClass;
4354 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4355 RegVT == MVT::v4bf16)
4356 RC = &ARM::DPRRegClass;
4357 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4358 RegVT == MVT::v8bf16)
4359 RC = &ARM::QPRRegClass;
4360 else if (RegVT == MVT::i32)
4361 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4362 : &ARM::GPRRegClass;
4363 else
4364 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4364)
;
4365
4366 // Transform the arguments in physical registers into virtual ones.
4367 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4368 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4369
4370 // If this value is passed in r0 and has the returned attribute (e.g.
4371 // C++ 'structors), record this fact for later use.
4372 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4373 AFI->setPreservesR0();
4374 }
4375 }
4376
4377 // If this is an 8 or 16-bit value, it is really passed promoted
4378 // to 32 bits. Insert an assert[sz]ext to capture this, then
4379 // truncate to the right size.
4380 switch (VA.getLocInfo()) {
4381 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4381)
;
4382 case CCValAssign::Full: break;
4383 case CCValAssign::BCvt:
4384 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4385 break;
4386 case CCValAssign::SExt:
4387 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4388 DAG.getValueType(VA.getValVT()));
4389 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4390 break;
4391 case CCValAssign::ZExt:
4392 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4393 DAG.getValueType(VA.getValVT()));
4394 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4395 break;
4396 }
4397
4398 // f16 arguments have their size extended to 4 bytes and passed as if they
4399 // had been copied to the LSBs of a 32-bit register.
4400 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4401 if (VA.needsCustom() &&
4402 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4403 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4404
4405 InVals.push_back(ArgValue);
4406 } else { // VA.isRegLoc()
4407 // sanity check
4408 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4408, __PRETTY_FUNCTION__))
;
4409 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4409, __PRETTY_FUNCTION__))
;
4410
4411 int index = VA.getValNo();
4412
4413 // Some Ins[] entries become multiple ArgLoc[] entries.
4414 // Process them only once.
4415 if (index != lastInsIndex)
4416 {
4417 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4418 // FIXME: For now, all byval parameter objects are marked mutable.
4419 // This can be changed with more analysis.
4420 // In case of tail call optimization mark all arguments mutable.
4421 // Since they could be overwritten by lowering of arguments in case of
4422 // a tail call.
4423 if (Flags.isByVal()) {
4424 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4425, __PRETTY_FUNCTION__))
4425 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4425, __PRETTY_FUNCTION__))
;
4426 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4427
4428 int FrameIndex = StoreByValRegs(
4429 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4430 VA.getLocMemOffset(), Flags.getByValSize());
4431 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4432 CCInfo.nextInRegsParam();
4433 } else {
4434 unsigned FIOffset = VA.getLocMemOffset();
4435 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4436 FIOffset, true);
4437
4438 // Create load nodes to retrieve arguments from the stack.
4439 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4440 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4441 MachinePointerInfo::getFixedStack(
4442 DAG.getMachineFunction(), FI)));
4443 }
4444 lastInsIndex = index;
4445 }
4446 }
4447 }
4448
4449 // varargs
4450 if (isVarArg && MFI.hasVAStart()) {
4451 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4452 TotalArgRegsSaveSize);
4453 if (AFI->isCmseNSEntryFunction()) {
4454 DiagnosticInfoUnsupported Diag(
4455 DAG.getMachineFunction().getFunction(),
4456 "secure entry function must not be variadic", dl.getDebugLoc());
4457 DAG.getContext()->diagnose(Diag);
4458 }
4459 }
4460
4461 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
4462
4463 if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4464 DiagnosticInfoUnsupported Diag(
4465 DAG.getMachineFunction().getFunction(),
4466 "secure entry function requires arguments on stack", dl.getDebugLoc());
4467 DAG.getContext()->diagnose(Diag);
4468 }
4469
4470 return Chain;
4471}
4472
4473/// isFloatingPointZero - Return true if this is +0.0.
4474static bool isFloatingPointZero(SDValue Op) {
4475 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4476 return CFP->getValueAPF().isPosZero();
4477 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4478 // Maybe this has already been legalized into the constant pool?
4479 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4480 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4481 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4482 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4483 return CFP->getValueAPF().isPosZero();
4484 }
4485 } else if (Op->getOpcode() == ISD::BITCAST &&
4486 Op->getValueType(0) == MVT::f64) {
4487 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4488 // created by LowerConstantFP().
4489 SDValue BitcastOp = Op->getOperand(0);
4490 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4491 isNullConstant(BitcastOp->getOperand(0)))
4492 return true;
4493 }
4494 return false;
4495}
4496
4497/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4498/// the given operands.
4499SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4500 SDValue &ARMcc, SelectionDAG &DAG,
4501 const SDLoc &dl) const {
4502 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4503 unsigned C = RHSC->getZExtValue();
4504 if (!isLegalICmpImmediate((int32_t)C)) {
4505 // Constant does not fit, try adjusting it by one.
4506 switch (CC) {
4507 default: break;
4508 case ISD::SETLT:
4509 case ISD::SETGE:
4510 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4511 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4512 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4513 }
4514 break;
4515 case ISD::SETULT:
4516 case ISD::SETUGE:
4517 if (C != 0 && isLegalICmpImmediate(C-1)) {
4518 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4519 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4520 }
4521 break;
4522 case ISD::SETLE:
4523 case ISD::SETGT:
4524 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4525 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4526 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4527 }
4528 break;
4529 case ISD::SETULE:
4530 case ISD::SETUGT:
4531 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4532 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4533 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4534 }
4535 break;
4536 }
4537 }
4538 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4539 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4540 // In ARM and Thumb-2, the compare instructions can shift their second
4541 // operand.
4542 CC = ISD::getSetCCSwappedOperands(CC);
4543 std::swap(LHS, RHS);
4544 }
4545
4546 // Thumb1 has very limited immediate modes, so turning an "and" into a
4547 // shift can save multiple instructions.
4548 //
4549 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4550 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4551 // own. If it's the operand to an unsigned comparison with an immediate,
4552 // we can eliminate one of the shifts: we transform
4553 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4554 //
4555 // We avoid transforming cases which aren't profitable due to encoding
4556 // details:
4557 //
4558 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4559 // would not; in that case, we're essentially trading one immediate load for
4560 // another.
4561 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4562 // 3. C2 is zero; we have other code for this special case.
4563 //
4564 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4565 // instruction, since the AND is always one instruction anyway, but we could
4566 // use narrow instructions in some cases.
4567 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4568 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4569 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4570 !isSignedIntSetCC(CC)) {
4571 unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4572 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4573 uint64_t RHSV = RHSC->getZExtValue();
4574 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4575 unsigned ShiftBits = countLeadingZeros(Mask);
4576 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4577 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4578 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4579 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4580 }
4581 }
4582 }
4583
4584 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4585 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4586 // way a cmp would.
4587 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4588 // some tweaks to the heuristics for the previous and->shift transform.
4589 // FIXME: Optimize cases where the LHS isn't a shift.
4590 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4591 isa<ConstantSDNode>(RHS) &&
4592 cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4593 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4594 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4595 unsigned ShiftAmt =
4596 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4597 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4598 DAG.getVTList(MVT::i32, MVT::i32),
4599 LHS.getOperand(0),
4600 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4601 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4602 Shift.getValue(1), SDValue());
4603 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4604 return Chain.getValue(1);
4605 }
4606
4607 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4608
4609 // If the RHS is a constant zero then the V (overflow) flag will never be
4610 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4611 // simpler for other passes (like the peephole optimiser) to deal with.
4612 if (isNullConstant(RHS)) {
4613 switch (CondCode) {
4614 default: break;
4615 case ARMCC::GE:
4616 CondCode = ARMCC::PL;
4617 break;
4618 case ARMCC::LT:
4619 CondCode = ARMCC::MI;
4620 break;
4621 }
4622 }
4623
4624 ARMISD::NodeType CompareType;
4625 switch (CondCode) {
4626 default:
4627 CompareType = ARMISD::CMP;
4628 break;
4629 case ARMCC::EQ:
4630 case ARMCC::NE:
4631 // Uses only Z Flag
4632 CompareType = ARMISD::CMPZ;
4633 break;
4634 }
4635 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4636 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4637}
4638
4639/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4640SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4641 SelectionDAG &DAG, const SDLoc &dl,
4642 bool Signaling) const {
4643 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)((Subtarget->hasFP64() || RHS.getValueType() != MVT::f64) ?
static_cast<void> (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4643, __PRETTY_FUNCTION__))
;
4644 SDValue Cmp;
4645 if (!isFloatingPointZero(RHS))
4646 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4647 dl, MVT::Glue, LHS, RHS);
4648 else
4649 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4650 dl, MVT::Glue, LHS);
4651 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4652}
4653
4654/// duplicateCmp - Glue values can have only one use, so this function
4655/// duplicates a comparison node.
4656SDValue
4657ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4658 unsigned Opc = Cmp.getOpcode();
4659 SDLoc DL(Cmp);
4660 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4661 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4662
4663 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4663, __PRETTY_FUNCTION__))
;
4664 Cmp = Cmp.getOperand(0);
4665 Opc = Cmp.getOpcode();
4666 if (Opc == ARMISD::CMPFP)
4667 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4668 else {
4669 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4669, __PRETTY_FUNCTION__))
;
4670 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4671 }
4672 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4673}
4674
4675// This function returns three things: the arithmetic computation itself
4676// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4677// comparison and the condition code define the case in which the arithmetic
4678// computation *does not* overflow.
4679std::pair<SDValue, SDValue>
4680ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4681 SDValue &ARMcc) const {
4682 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4682, __PRETTY_FUNCTION__))
;
4683
4684 SDValue Value, OverflowCmp;
4685 SDValue LHS = Op.getOperand(0);
4686 SDValue RHS = Op.getOperand(1);
4687 SDLoc dl(Op);
4688
4689 // FIXME: We are currently always generating CMPs because we don't support
4690 // generating CMN through the backend. This is not as good as the natural
4691 // CMP case because it causes a register dependency and cannot be folded
4692 // later.
4693
4694 switch (Op.getOpcode()) {
4695 default:
4696 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4696)
;
4697 case ISD::SADDO:
4698 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4699 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4700 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4701 break;
4702 case ISD::UADDO:
4703 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4704 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4705 // We do not use it in the USUBO case as Value may not be used.
4706 Value = DAG.getNode(ARMISD::ADDC, dl,
4707 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4708 .getValue(0);
4709 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4710 break;
4711 case ISD::SSUBO:
4712 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4713 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4714 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4715 break;
4716 case ISD::USUBO:
4717 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4718 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4719 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4720 break;
4721 case ISD::UMULO:
4722 // We generate a UMUL_LOHI and then check if the high word is 0.
4723 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4724 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4725 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4726 LHS, RHS);
4727 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4728 DAG.getConstant(0, dl, MVT::i32));
4729 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4730 break;
4731 case ISD::SMULO:
4732 // We generate a SMUL_LOHI and then check if all the bits of the high word
4733 // are the same as the sign bit of the low word.
4734 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4735 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4736 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4737 LHS, RHS);
4738 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4739 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4740 Value.getValue(0),
4741 DAG.getConstant(31, dl, MVT::i32)));
4742 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4743 break;
4744 } // switch (...)
4745
4746 return std::make_pair(Value, OverflowCmp);
4747}
4748
4749SDValue
4750ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4751 // Let legalize expand this if it isn't a legal type yet.
4752 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4753 return SDValue();
4754
4755 SDValue Value, OverflowCmp;
4756 SDValue ARMcc;
4757 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4758 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4759 SDLoc dl(Op);
4760 // We use 0 and 1 as false and true values.
4761 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4762 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4763 EVT VT = Op.getValueType();
4764
4765 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4766 ARMcc, CCR, OverflowCmp);
4767
4768 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4769 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4770}
4771
4772static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4773 SelectionDAG &DAG) {
4774 SDLoc DL(BoolCarry);
4775 EVT CarryVT = BoolCarry.getValueType();
4776
4777 // This converts the boolean value carry into the carry flag by doing
4778 // ARMISD::SUBC Carry, 1
4779 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4780 DAG.getVTList(CarryVT, MVT::i32),
4781 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4782 return Carry.getValue(1);
4783}
4784
4785static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
4786 SelectionDAG &DAG) {
4787 SDLoc DL(Flags);
4788
4789 // Now convert the carry flag into a boolean carry. We do this
4790 // using ARMISD:ADDE 0, 0, Carry
4791 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4792 DAG.getConstant(0, DL, MVT::i32),
4793 DAG.getConstant(0, DL, MVT::i32), Flags);
4794}
4795
4796SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4797 SelectionDAG &DAG) const {
4798 // Let legalize expand this if it isn't a legal type yet.
4799 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4800 return SDValue();
4801
4802 SDValue LHS = Op.getOperand(0);
4803 SDValue RHS = Op.getOperand(1);
4804 SDLoc dl(Op);
4805
4806 EVT VT = Op.getValueType();
4807 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4808 SDValue Value;
4809 SDValue Overflow;
4810 switch (Op.getOpcode()) {
4811 default:
4812 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4812)
;
4813 case ISD::UADDO:
4814 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4815 // Convert the carry flag into a boolean value.
4816 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4817 break;
4818 case ISD::USUBO: {
4819 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4820 // Convert the carry flag into a boolean value.
4821 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4822 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4823 // value. So compute 1 - C.
4824 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4825 DAG.getConstant(1, dl, MVT::i32), Overflow);
4826 break;
4827 }
4828 }
4829
4830 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4831}
4832
4833static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
4834 const ARMSubtarget *Subtarget) {
4835 EVT VT = Op.getValueType();
4836 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
4837 return SDValue();
4838 if (!VT.isSimple())
4839 return SDValue();
4840
4841 unsigned NewOpcode;
4842 bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
4843 switch (VT.getSimpleVT().SimpleTy) {
4844 default:
4845 return SDValue();
4846 case MVT::i8:
4847 NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
4848 break;
4849 case MVT::i16:
4850 NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
4851 break;
4852 }
4853
4854 SDLoc dl(Op);
4855 SDValue Add =
4856 DAG.getNode(NewOpcode, dl, MVT::i32,
4857 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4858 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4859 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
4860}
4861
4862SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4863 SDValue Cond = Op.getOperand(0);
4864 SDValue SelectTrue = Op.getOperand(1);
4865 SDValue SelectFalse = Op.getOperand(2);
4866 SDLoc dl(Op);
4867 unsigned Opc = Cond.getOpcode();
4868
4869 if (Cond.getResNo() == 1 &&
4870 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4871 Opc == ISD::USUBO)) {
4872 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4873 return SDValue();
4874
4875 SDValue Value, OverflowCmp;
4876 SDValue ARMcc;
4877 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4878 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4879 EVT VT = Op.getValueType();
4880
4881 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4882 OverflowCmp, DAG);
4883 }
4884
4885 // Convert:
4886 //
4887 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4888 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4889 //
4890 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4891 const ConstantSDNode *CMOVTrue =
4892 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4893 const ConstantSDNode *CMOVFalse =
4894 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4895
4896 if (CMOVTrue && CMOVFalse) {
4897 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4898 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4899
4900 SDValue True;
4901 SDValue False;
4902 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4903 True = SelectTrue;
4904 False = SelectFalse;
4905 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4906 True = SelectFalse;
4907 False = SelectTrue;
4908 }
4909
4910 if (True.getNode() && False.getNode()) {
4911 EVT VT = Op.getValueType();
4912 SDValue ARMcc = Cond.getOperand(2);
4913 SDValue CCR = Cond.getOperand(3);
4914 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4915 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4915, __PRETTY_FUNCTION__))
;
4916 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4917 }
4918 }
4919 }
4920
4921 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4922 // undefined bits before doing a full-word comparison with zero.
4923 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4924 DAG.getConstant(1, dl, Cond.getValueType()));
4925
4926 return DAG.getSelectCC(dl, Cond,
4927 DAG.getConstant(0, dl, Cond.getValueType()),
4928 SelectTrue, SelectFalse, ISD::SETNE);
4929}
4930
4931static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4932 bool &swpCmpOps, bool &swpVselOps) {
4933 // Start by selecting the GE condition code for opcodes that return true for
4934 // 'equality'
4935 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4936 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
4937 CondCode = ARMCC::GE;
4938
4939 // and GT for opcodes that return false for 'equality'.
4940 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4941 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
4942 CondCode = ARMCC::GT;
4943
4944 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4945 // to swap the compare operands.
4946 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4947 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
4948 swpCmpOps = true;
4949
4950 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4951 // If we have an unordered opcode, we need to swap the operands to the VSEL
4952 // instruction (effectively negating the condition).
4953 //
4954 // This also has the effect of swapping which one of 'less' or 'greater'
4955 // returns true, so we also swap the compare operands. It also switches
4956 // whether we return true for 'equality', so we compensate by picking the
4957 // opposite condition code to our original choice.
4958 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4959 CC == ISD::SETUGT) {
4960 swpCmpOps = !swpCmpOps;
4961 swpVselOps = !swpVselOps;
4962 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4963 }
4964
4965 // 'ordered' is 'anything but unordered', so use the VS condition code and
4966 // swap the VSEL operands.
4967 if (CC == ISD::SETO) {
4968 CondCode = ARMCC::VS;
4969 swpVselOps = true;
4970 }
4971
4972 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4973 // code and swap the VSEL operands. Also do this if we don't care about the
4974 // unordered case.
4975 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
4976 CondCode = ARMCC::EQ;
4977 swpVselOps = true;
4978 }
4979}
4980
4981SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4982 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4983 SDValue Cmp, SelectionDAG &DAG) const {
4984 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4985 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4986 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4987 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4988 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4989
4990 SDValue TrueLow = TrueVal.getValue(0);
4991 SDValue TrueHigh = TrueVal.getValue(1);
4992 SDValue FalseLow = FalseVal.getValue(0);
4993 SDValue FalseHigh = FalseVal.getValue(1);
4994
4995 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4996 ARMcc, CCR, Cmp);
4997 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4998 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4999
5000 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5001 } else {
5002 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5003 Cmp);
5004 }
5005}
5006
5007static bool isGTorGE(ISD::CondCode CC) {
5008 return CC == ISD::SETGT || CC == ISD::SETGE;
5009}
5010
5011static bool isLTorLE(ISD::CondCode CC) {
5012 return CC == ISD::SETLT || CC == ISD::SETLE;
5013}
5014
5015// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5016// All of these conditions (and their <= and >= counterparts) will do:
5017// x < k ? k : x
5018// x > k ? x : k
5019// k < x ? x : k
5020// k > x ? k : x
5021static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5022 const SDValue TrueVal, const SDValue FalseVal,
5023 const ISD::CondCode CC, const SDValue K) {
5024 return (isGTorGE(CC) &&
5025 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5026 (isLTorLE(CC) &&
5027 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5028}
5029
5030// Check if two chained conditionals could be converted into SSAT or USAT.
5031//
5032// SSAT can replace a set of two conditional selectors that bound a number to an
5033// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5034//
5035// x < -k ? -k : (x > k ? k : x)
5036// x < -k ? -k : (x < k ? x : k)
5037// x > -k ? (x > k ? k : x) : -k
5038// x < k ? (x < -k ? -k : x) : k
5039// etc.
5040//
5041// LLVM canonicalizes these to either a min(max()) or a max(min())
5042// pattern. This function tries to match one of these and will return a SSAT
5043// node if successful.
5044//
5045// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5046// is a power of 2.
5047static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
5048 EVT VT = Op.getValueType();
5049 SDValue V1 = Op.getOperand(0);
5050 SDValue K1 = Op.getOperand(1);
5051 SDValue TrueVal1 = Op.getOperand(2);
5052 SDValue FalseVal1 = Op.getOperand(3);
5053 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5054
5055 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5056 if (Op2.getOpcode() != ISD::SELECT_CC)
5057 return SDValue();
5058
5059 SDValue V2 = Op2.getOperand(0);
5060 SDValue K2 = Op2.getOperand(1);
5061 SDValue TrueVal2 = Op2.getOperand(2);
5062 SDValue FalseVal2 = Op2.getOperand(3);
5063 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5064
5065 SDValue V1Tmp = V1;
5066 SDValue V2Tmp = V2;
5067
5068 // Check that the registers and the constants match a max(min()) or min(max())
5069 // pattern
5070 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5071 K2 != FalseVal2 ||
5072 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5073 return SDValue();
5074
5075 // Check that the constant in the lower-bound check is
5076 // the opposite of the constant in the upper-bound check
5077 // in 1's complement.
5078 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5079 return SDValue();
5080
5081 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5082 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5083 int64_t PosVal = std::max(Val1, Val2);
5084 int64_t NegVal = std::min(Val1, Val2);
5085
5086 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5087 !isPowerOf2_64(PosVal + 1))
5088 return SDValue();
5089
5090 // Handle the difference between USAT (unsigned) and SSAT (signed)
5091 // saturation
5092 // At this point, PosVal is guaranteed to be positive
5093 uint64_t K = PosVal;
5094 SDLoc dl(Op);
5095 if (Val1 == ~Val2)
5096 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5097 DAG.getConstant(countTrailingOnes(K), dl, VT));
5098 if (NegVal == 0)
5099 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5100 DAG.getConstant(countTrailingOnes(K), dl, VT));
5101
5102 return SDValue();
5103}
5104
5105// Check if a condition of the type x < k ? k : x can be converted into a
5106// bit operation instead of conditional moves.
5107// Currently this is allowed given:
5108// - The conditions and values match up
5109// - k is 0 or -1 (all ones)
5110// This function will not check the last condition, thats up to the caller
5111// It returns true if the transformation can be made, and in such case
5112// returns x in V, and k in SatK.
5113static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
5114 SDValue &SatK)
5115{
5116 SDValue LHS = Op.getOperand(0);
5117 SDValue RHS = Op.getOperand(1);
5118 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5119 SDValue TrueVal = Op.getOperand(2);
5120 SDValue FalseVal = Op.getOperand(3);
5121
5122 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5123 ? &RHS
5124 : nullptr;
5125
5126 // No constant operation in comparison, early out
5127 if (!K)
5128 return false;
5129
5130 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5131 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5132 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5133
5134 // If the constant on left and right side, or variable on left and right,
5135 // does not match, early out
5136 if (*K != KTmp || V != VTmp)
5137 return false;
5138
5139 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5140 SatK = *K;
5141 return true;
5142 }
5143
5144 return false;
5145}
5146
5147bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5148 if (VT == MVT::f32)
5149 return !Subtarget->hasVFP2Base();
5150 if (VT == MVT::f64)
5151 return !Subtarget->hasFP64();
5152 if (VT == MVT::f16)
5153 return !Subtarget->hasFullFP16();
5154 return false;
5155}
5156
5157SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5158 EVT VT = Op.getValueType();
5159 SDLoc dl(Op);
5160
5161 // Try to convert two saturating conditional selects into a single SSAT
5162 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5163 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5164 return SatValue;
5165
5166 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5167 // into more efficient bit operations, which is possible when k is 0 or -1
5168 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5169 // single instructions. On Thumb the shift and the bit operation will be two
5170 // instructions.
5171 // Only allow this transformation on full-width (32-bit) operations
5172 SDValue LowerSatConstant;
5173 SDValue SatValue;
5174 if (VT == MVT::i32 &&
5175 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5176 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5177 DAG.getConstant(31, dl, VT));
5178 if (isNullConstant(LowerSatConstant)) {
5179 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5180 DAG.getAllOnesConstant(dl, VT));
5181 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5182 } else if (isAllOnesConstant(LowerSatConstant))
5183 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5184 }
5185
5186 SDValue LHS = Op.getOperand(0);
5187 SDValue RHS = Op.getOperand(1);
5188 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5189 SDValue TrueVal = Op.getOperand(2);
5190 SDValue FalseVal = Op.getOperand(3);
5191 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5192 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5193
5194 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5195 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5196 unsigned TVal = CTVal->getZExtValue();
5197 unsigned FVal = CFVal->getZExtValue();
5198 unsigned Opcode = 0;
5199
5200 if (TVal == ~FVal) {
5201 Opcode = ARMISD::CSINV;
5202 } else if (TVal == ~FVal + 1) {
5203 Opcode = ARMISD::CSNEG;
5204 } else if (TVal + 1 == FVal) {
5205 Opcode = ARMISD::CSINC;
5206 } else if (TVal == FVal + 1) {
5207 Opcode = ARMISD::CSINC;
5208 std::swap(TrueVal, FalseVal);
5209 std::swap(TVal, FVal);
5210 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5211 }
5212
5213 if (Opcode) {
5214 // If one of the constants is cheaper than another, materialise the
5215 // cheaper one and let the csel generate the other.
5216 if (Opcode != ARMISD::CSINC &&
5217 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5218 std::swap(TrueVal, FalseVal);
5219 std::swap(TVal, FVal);
5220 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5221 }
5222
5223 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5224 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5225 // -(-a) == a, but (a+1)+1 != a).
5226 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5227 std::swap(TrueVal, FalseVal);
5228 std::swap(TVal, FVal);
5229 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5230 }
5231
5232 // Drops F's value because we can get it by inverting/negating TVal.
5233 FalseVal = TrueVal;
5234
5235 SDValue ARMcc;
5236 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5237 EVT VT = TrueVal.getValueType();
5238 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5239 }
5240 }
5241
5242 if (isUnsupportedFloatingType(LHS.getValueType())) {
5243 DAG.getTargetLoweringInfo().softenSetCCOperands(
5244 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5245
5246 // If softenSetCCOperands only returned one value, we should compare it to
5247 // zero.
5248 if (!RHS.getNode()) {
5249 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5250 CC = ISD::SETNE;
5251 }
5252 }
5253
5254 if (LHS.getValueType() == MVT::i32) {
5255 // Try to generate VSEL on ARMv8.
5256 // The VSEL instruction can't use all the usual ARM condition
5257 // codes: it only has two bits to select the condition code, so it's
5258 // constrained to use only GE, GT, VS and EQ.
5259 //
5260 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5261 // swap the operands of the previous compare instruction (effectively
5262 // inverting the compare condition, swapping 'less' and 'greater') and
5263 // sometimes need to swap the operands to the VSEL (which inverts the
5264 // condition in the sense of firing whenever the previous condition didn't)
5265 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5266 TrueVal.getValueType() == MVT::f32 ||
5267 TrueVal.getValueType() == MVT::f64)) {
5268 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5269 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5270 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5271 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5272 std::swap(TrueVal, FalseVal);
5273 }
5274 }
5275
5276 SDValue ARMcc;
5277 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5278 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5279 // Choose GE over PL, which vsel does now support
5280 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5281 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5282 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5283 }
5284
5285 ARMCC::CondCodes CondCode, CondCode2;
5286 FPCCToARMCC(CC, CondCode, CondCode2);
5287
5288 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5289 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5290 // must use VSEL (limited condition codes), due to not having conditional f16
5291 // moves.
5292 if (Subtarget->hasFPARMv8Base() &&
5293 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5294 (TrueVal.getValueType() == MVT::f16 ||
5295 TrueVal.getValueType() == MVT::f32 ||
5296 TrueVal.getValueType() == MVT::f64)) {
5297 bool swpCmpOps = false;
5298 bool swpVselOps = false;
5299 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5300
5301 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5302 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5303 if (swpCmpOps)
5304 std::swap(LHS, RHS);
5305 if (swpVselOps)
5306 std::swap(TrueVal, FalseVal);
5307 }
5308 }
5309
5310 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5311 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5312 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5313 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5314 if (CondCode2 != ARMCC::AL) {
5315 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5316 // FIXME: Needs another CMP because flag can have but one use.
5317 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5318 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5319 }
5320 return Result;
5321}
5322
5323/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5324/// to morph to an integer compare sequence.
5325static bool canChangeToInt(SDValue Op, bool &SeenZero,
5326 const ARMSubtarget *Subtarget) {
5327 SDNode *N = Op.getNode();
5328 if (!N->hasOneUse())
5329 // Otherwise it requires moving the value from fp to integer registers.
5330 return false;
5331 if (!N->getNumValues())
5332 return false;
5333 EVT VT = Op.getValueType();
5334 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5335 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5336 // vmrs are very slow, e.g. cortex-a8.
5337 return false;
5338
5339 if (isFloatingPointZero(Op)) {
5340 SeenZero = true;
5341 return true;
5342 }
5343 return ISD::isNormalLoad(N);
5344}
5345
5346static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
5347 if (isFloatingPointZero(Op))
5348 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5349
5350 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5351 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5352 Ld->getPointerInfo(), Ld->getAlignment(),
5353 Ld->getMemOperand()->getFlags());
5354
5355 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5355)
;
5356}
5357
5358static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5359 SDValue &RetVal1, SDValue &RetVal2) {
5360 SDLoc dl(Op);
5361
5362 if (isFloatingPointZero(Op)) {
5363 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5364 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5365 return;
5366 }
5367
5368 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5369 SDValue Ptr = Ld->getBasePtr();
5370 RetVal1 =
5371 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5372 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
5373
5374 EVT PtrType = Ptr.getValueType();
5375 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
5376 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5377 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5378 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5379 Ld->getPointerInfo().getWithOffset(4), NewAlign,
5380 Ld->getMemOperand()->getFlags());
5381 return;
5382 }
5383
5384 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5384)
;
5385}
5386
5387/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5388/// f32 and even f64 comparisons to integer ones.
5389SDValue
5390ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5391 SDValue Chain = Op.getOperand(0);
5392 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5393 SDValue LHS = Op.getOperand(2);
5394 SDValue RHS = Op.getOperand(3);
5395 SDValue Dest = Op.getOperand(4);
5396 SDLoc dl(Op);
5397
5398 bool LHSSeenZero = false;
5399 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5400 bool RHSSeenZero = false;
5401 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5402 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5403 // If unsafe fp math optimization is enabled and there are no other uses of
5404 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5405 // to an integer comparison.
5406 if (CC == ISD::SETOEQ)
5407 CC = ISD::SETEQ;
5408 else if (CC == ISD::SETUNE)
5409 CC = ISD::SETNE;
5410
5411 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5412 SDValue ARMcc;
5413 if (LHS.getValueType() == MVT::f32) {
5414 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5415 bitcastf32Toi32(LHS, DAG), Mask);
5416 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5417 bitcastf32Toi32(RHS, DAG), Mask);
5418 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5419 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5420 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5421 Chain, Dest, ARMcc, CCR, Cmp);
5422 }
5423
5424 SDValue LHS1, LHS2;
5425 SDValue RHS1, RHS2;
5426 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5427 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5428 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5429 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5430 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5431 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5432 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5433 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5434 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5435 }
5436
5437 return SDValue();
5438}
5439
5440SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5441 SDValue Chain = Op.getOperand(0);
5442 SDValue Cond = Op.getOperand(1);
5443 SDValue Dest = Op.getOperand(2);
5444 SDLoc dl(Op);
5445
5446 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5447 // instruction.
5448 unsigned Opc = Cond.getOpcode();
5449 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5450 !Subtarget->isThumb1Only();
5451 if (Cond.getResNo() == 1 &&
5452 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5453 Opc == ISD::USUBO || OptimizeMul)) {
5454 // Only lower legal XALUO ops.
5455 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5456 return SDValue();
5457
5458 // The actual operation with overflow check.
5459 SDValue Value, OverflowCmp;
5460 SDValue ARMcc;
5461 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5462
5463 // Reverse the condition code.
5464 ARMCC::CondCodes CondCode =
5465 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5466 CondCode = ARMCC::getOppositeCondition(CondCode);
5467 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5468 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5469
5470 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5471 OverflowCmp);
5472 }
5473
5474 return SDValue();
5475}
5476
5477SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5478 SDValue Chain = Op.getOperand(0);
5479 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5480 SDValue LHS = Op.getOperand(2);
5481 SDValue RHS = Op.getOperand(3);
5482 SDValue Dest = Op.getOperand(4);
5483 SDLoc dl(Op);
5484
5485 if (isUnsupportedFloatingType(LHS.getValueType())) {
5486 DAG.getTargetLoweringInfo().softenSetCCOperands(
5487 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5488
5489 // If softenSetCCOperands only returned one value, we should compare it to
5490 // zero.
5491 if (!RHS.getNode()) {
5492 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5493 CC = ISD::SETNE;
5494 }
5495 }
5496
5497 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5498 // instruction.
5499 unsigned Opc = LHS.getOpcode();
5500 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5501 !Subtarget->isThumb1Only();
5502 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5503 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5504 Opc == ISD::USUBO || OptimizeMul) &&
5505 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5506 // Only lower legal XALUO ops.
5507 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5508 return SDValue();
5509
5510 // The actual operation with overflow check.
5511 SDValue Value, OverflowCmp;
5512 SDValue ARMcc;
5513 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5514
5515 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5516 // Reverse the condition code.
5517 ARMCC::CondCodes CondCode =
5518 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5519 CondCode = ARMCC::getOppositeCondition(CondCode);
5520 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5521 }
5522 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5523
5524 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5525 OverflowCmp);
5526 }
5527
5528 if (LHS.getValueType() == MVT::i32) {
5529 SDValue ARMcc;
5530 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5531 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5532 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5533 Chain, Dest, ARMcc, CCR, Cmp);
5534 }
5535
5536 if (getTargetMachine().Options.UnsafeFPMath &&
5537 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5538 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5539 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5540 return Result;
5541 }
5542
5543 ARMCC::CondCodes CondCode, CondCode2;
5544 FPCCToARMCC(CC, CondCode, CondCode2);
5545
5546 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5547 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5548 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5549 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5550 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5551 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5552 if (CondCode2 != ARMCC::AL) {
5553 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5554 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5555 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5556 }
5557 return Res;
5558}
5559
5560SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5561 SDValue Chain = Op.getOperand(0);
5562 SDValue Table = Op.getOperand(1);
5563 SDValue Index = Op.getOperand(2);
5564 SDLoc dl(Op);
5565
5566 EVT PTy = getPointerTy(DAG.getDataLayout());
5567 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5568 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5569 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5570 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5571 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5572 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5573 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5574 // which does another jump to the destination. This also makes it easier
5575 // to translate it to TBB / TBH later (Thumb2 only).
5576 // FIXME: This might not work if the function is extremely large.
5577 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5578 Addr, Op.getOperand(2), JTI);
5579 }
5580 if (isPositionIndependent() || Subtarget->isROPI()) {
5581 Addr =
5582 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5583 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5584 Chain = Addr.getValue(1);
5585 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5586 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5587 } else {
5588 Addr =
5589 DAG.getLoad(PTy, dl, Chain, Addr,
5590 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5591 Chain = Addr.getValue(1);
5592 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5593 }
5594}
5595
5596static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
5597 EVT VT = Op.getValueType();
5598 SDLoc dl(Op);
5599
5600 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5601 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5602 return Op;
5603 return DAG.UnrollVectorOp(Op.getNode());
5604 }
5605
5606 const bool HasFullFP16 =
5607 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5608
5609 EVT NewTy;
5610 const EVT OpTy = Op.getOperand(0).getValueType();
5611 if (OpTy == MVT::v4f32)
5612 NewTy = MVT::v4i32;
5613 else if (OpTy == MVT::v4f16 && HasFullFP16)
5614 NewTy = MVT::v4i16;
5615 else if (OpTy == MVT::v8f16 && HasFullFP16)
5616 NewTy = MVT::v8i16;
5617 else
5618 llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5618)
;
5619
5620 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5621 return DAG.UnrollVectorOp(Op.getNode());
5622
5623 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5624 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5625}
5626
5627SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5628 EVT VT = Op.getValueType();
5629 if (VT.isVector())
5630 return LowerVectorFP_TO_INT(Op, DAG);
5631
5632 bool IsStrict = Op->isStrictFPOpcode();
5633 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5634
5635 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5636 RTLIB::Libcall LC;
5637 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5638 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5639 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5640 Op.getValueType());
5641 else
5642 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5643 Op.getValueType());
5644 SDLoc Loc(Op);
5645 MakeLibCallOptions CallOptions;
5646 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5647 SDValue Result;
5648 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5649 CallOptions, Loc, Chain);
5650 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5651 }
5652
5653 // FIXME: Remove this when we have strict fp instruction selection patterns
5654 if (IsStrict) {
5655 SDLoc Loc(Op);
5656 SDValue Result =
5657 DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5658 : ISD::FP_TO_UINT,
5659 Loc, Op.getValueType(), SrcVal);
5660 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5661 }
5662
5663 return Op;
5664}
5665
5666static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
5667 EVT VT = Op.getValueType();
5668 SDLoc dl(Op);
5669
5670 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5671 if (VT.getVectorElementType() == MVT::f32)
5672 return Op;
5673 return DAG.UnrollVectorOp(Op.getNode());
5674 }
5675
5676 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
5677 Op.getOperand(0).getValueType() == MVT::v8i16) &&(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
5678 "Invalid type for custom lowering!")(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
;
5679
5680 const bool HasFullFP16 =
5681 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5682
5683 EVT DestVecType;
5684 if (VT == MVT::v4f32)
5685 DestVecType = MVT::v4i32;
5686 else if (VT == MVT::v4f16 && HasFullFP16)
5687 DestVecType = MVT::v4i16;
5688 else if (VT == MVT::v8f16 && HasFullFP16)
5689 DestVecType = MVT::v8i16;
5690 else
5691 return DAG.UnrollVectorOp(Op.getNode());
5692
5693 unsigned CastOpc;
5694 unsigned Opc;
5695 switch (Op.getOpcode()) {
5696 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5696)
;
5697 case ISD::SINT_TO_FP:
5698 CastOpc = ISD::SIGN_EXTEND;
5699 Opc = ISD::SINT_TO_FP;
5700 break;
5701 case ISD::UINT_TO_FP:
5702 CastOpc = ISD::ZERO_EXTEND;
5703 Opc = ISD::UINT_TO_FP;
5704 break;
5705 }
5706
5707 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5708 return DAG.getNode(Opc, dl, VT, Op);
5709}
5710
5711SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5712 EVT VT = Op.getValueType();
5713 if (VT.isVector())
5714 return LowerVectorINT_TO_FP(Op, DAG);
5715 if (isUnsupportedFloatingType(VT)) {
5716 RTLIB::Libcall LC;
5717 if (Op.getOpcode() == ISD::SINT_TO_FP)
5718 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5719 Op.getValueType());
5720 else
5721 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5722 Op.getValueType());
5723 MakeLibCallOptions CallOptions;
5724 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5725 CallOptions, SDLoc(Op)).first;
5726 }
5727
5728 return Op;
5729}
5730
5731SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5732 // Implement fcopysign with a fabs and a conditional fneg.
5733 SDValue Tmp0 = Op.getOperand(0);
5734 SDValue Tmp1 = Op.getOperand(1);
5735 SDLoc dl(Op);
5736 EVT VT = Op.getValueType();
5737 EVT SrcVT = Tmp1.getValueType();
5738 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5739 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5740 bool UseNEON = !InGPR && Subtarget->hasNEON();
5741
5742 if (UseNEON) {
5743 // Use VBSL to copy the sign bit.
5744 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5745 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5746 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5747 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5748 if (VT == MVT::f64)
5749 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5750 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5751 DAG.getConstant(32, dl, MVT::i32));
5752 else /*if (VT == MVT::f32)*/
5753 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5754 if (SrcVT == MVT::f32) {
5755 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5756 if (VT == MVT::f64)
5757 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5758 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5759 DAG.getConstant(32, dl, MVT::i32));
5760 } else if (VT == MVT::f32)
5761 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5762 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5763 DAG.getConstant(32, dl, MVT::i32));
5764 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5765 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5766
5767 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
5768 dl, MVT::i32);
5769 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5770 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5771 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5772
5773 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5774 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5775 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5776 if (VT == MVT::f32) {
5777 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5778 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5779 DAG.getConstant(0, dl, MVT::i32));
5780 } else {
5781 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5782 }
5783
5784 return Res;
5785 }
5786
5787 // Bitcast operand 1 to i32.
5788 if (SrcVT == MVT::f64)
5789 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5790 Tmp1).getValue(1);
5791 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5792
5793 // Or in the signbit with integer operations.
5794 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5795 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5796 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
5797 if (VT == MVT::f32) {
5798 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5799 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5800 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5801 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5802 }
5803
5804 // f64: Or the high part with signbit and then combine two parts.
5805 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5806 Tmp0);
5807 SDValue Lo = Tmp0.getValue(0);
5808 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5809 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5810 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5811}
5812
5813SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5814 MachineFunction &MF = DAG.getMachineFunction();
5815 MachineFrameInfo &MFI = MF.getFrameInfo();
5816 MFI.setReturnAddressIsTaken(true);
5817
5818 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
5819 return SDValue();
5820
5821 EVT VT = Op.getValueType();
5822 SDLoc dl(Op);
5823 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5824 if (Depth) {
5825 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5826 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5827 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5828 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5829 MachinePointerInfo());
5830 }
5831
5832 // Return LR, which contains the return address. Mark it an implicit live-in.
5833 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5834 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5835}
5836
5837SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5838 const ARMBaseRegisterInfo &ARI =
5839 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5840 MachineFunction &MF = DAG.getMachineFunction();
5841 MachineFrameInfo &MFI = MF.getFrameInfo();
5842 MFI.setFrameAddressIsTaken(true);
5843
5844 EVT VT = Op.getValueType();
5845 SDLoc dl(Op); // FIXME probably not meaningful
5846 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5847 Register FrameReg = ARI.getFrameRegister(MF);
5848 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5849 while (Depth--)
5850 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5851 MachinePointerInfo());
5852 return FrameAddr;
5853}
5854
5855// FIXME? Maybe this could be a TableGen attribute on some registers and
5856// this table could be generated automatically from RegInfo.
5857Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
5858 const MachineFunction &MF) const {
5859 Register Reg = StringSwitch<unsigned>(RegName)
5860 .Case("sp", ARM::SP)
5861 .Default(0);
5862 if (Reg)
5863 return Reg;
5864 report_fatal_error(Twine("Invalid register name \""
5865 + StringRef(RegName) + "\"."));
5866}
5867
5868// Result is 64 bit value so split into two 32 bit values and return as a
5869// pair of values.
5870static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
5871 SelectionDAG &DAG) {
5872 SDLoc DL(N);
5873
5874 // This function is only supposed to be called for i64 type destination.
5875 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5876, __PRETTY_FUNCTION__))
5876 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5876, __PRETTY_FUNCTION__))
;
5877
5878 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5879 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
5880 N->getOperand(0),
5881 N->getOperand(1));
5882
5883 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
5884 Read.getValue(1)));
5885 Results.push_back(Read.getOperand(0));
5886}
5887
5888/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
5889/// When \p DstVT, the destination type of \p BC, is on the vector
5890/// register bank and the source of bitcast, \p Op, operates on the same bank,
5891/// it might be possible to combine them, such that everything stays on the
5892/// vector register bank.
5893/// \p return The node that would replace \p BT, if the combine
5894/// is possible.
5895static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
5896 SelectionDAG &DAG) {
5897 SDValue Op = BC->getOperand(0);
5898 EVT DstVT = BC->getValueType(0);
5899
5900 // The only vector instruction that can produce a scalar (remember,
5901 // since the bitcast was about to be turned into VMOVDRR, the source
5902 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
5903 // Moreover, we can do this combine only if there is one use.
5904 // Finally, if the destination type is not a vector, there is not
5905 // much point on forcing everything on the vector bank.
5906 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5907 !Op.hasOneUse())
5908 return SDValue();
5909
5910 // If the index is not constant, we will introduce an additional
5911 // multiply that will stick.
5912 // Give up in that case.
5913 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5914 if (!Index)
5915 return SDValue();
5916 unsigned DstNumElt = DstVT.getVectorNumElements();
5917
5918 // Compute the new index.
5919 const APInt &APIntIndex = Index->getAPIntValue();
5920 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
5921 NewIndex *= APIntIndex;
5922 // Check if the new constant index fits into i32.
5923 if (NewIndex.getBitWidth() > 32)
5924 return SDValue();
5925
5926 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
5927 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
5928 SDLoc dl(Op);
5929 SDValue ExtractSrc = Op.getOperand(0);
5930 EVT VecVT = EVT::getVectorVT(
5931 *DAG.getContext(), DstVT.getScalarType(),
5932 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
5933 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
5934 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
5935 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
5936}
5937
5938/// ExpandBITCAST - If the target supports VFP, this function is called to
5939/// expand a bit convert where either the source or destination type is i64 to
5940/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
5941/// operand type is illegal (e.g., v2f32 for a target that doesn't support
5942/// vectors), since the legalizer won't know what to do with that.
5943SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
5944 const ARMSubtarget *Subtarget) const {
5945 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5946 SDLoc dl(N);
5947 SDValue Op = N->getOperand(0);
5948
5949 // This function is only supposed to be called for i16 and i64 types, either
5950 // as the source or destination of the bit convert.
5951 EVT SrcVT = Op.getValueType();
5952 EVT DstVT = N->getValueType(0);
5953
5954 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
5955 (DstVT == MVT::f16 || DstVT == MVT::bf16))
5956 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
5957 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
5958
5959 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
5960 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
5961 return DAG.getNode(
5962 ISD::TRUNCATE, SDLoc(N), DstVT,
5963 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
5964
5965 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
5966 return SDValue();
5967
5968 // Turn i64->f64 into VMOVDRR.
5969 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
5970 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
5971 // if we can combine the bitcast with its source.
5972 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
5973 return Val;
5974
5975 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5976 DAG.getConstant(0, dl, MVT::i32));
5977 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
5978 DAG.getConstant(1, dl, MVT::i32));
5979 return DAG.getNode(ISD::BITCAST, dl, DstVT,
5980 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
5981 }
5982
5983 // Turn f64->i64 into VMOVRRD.
5984 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
5985 SDValue Cvt;
5986 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
5987 SrcVT.getVectorNumElements() > 1)
5988 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5989 DAG.getVTList(MVT::i32, MVT::i32),
5990 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
5991 else
5992 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
5993 DAG.getVTList(MVT::i32, MVT::i32), Op);
5994 // Merge the pieces into a single i64 value.
5995 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
5996 }
5997
5998 return SDValue();
5999}
6000
6001/// getZeroVector - Returns a vector of specified type with all zero elements.
6002/// Zero vectors are used to represent vector negation and in those cases
6003/// will be implemented with the NEON VNEG instruction. However, VNEG does
6004/// not support i64 elements, so sometimes the zero vectors will need to be
6005/// explicitly constructed. Regardless, use a canonical VMOV to create the
6006/// zero vector.
6007static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6008 assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6008, __PRETTY_FUNCTION__))
;
6009 // The canonical modified immediate encoding of a zero vector is....0!
6010 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6011 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6012 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6013 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6014}
6015
6016/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6017/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6018SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6019 SelectionDAG &DAG) const {
6020 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6020, __PRETTY_FUNCTION__))
;
6021 EVT VT = Op.getValueType();
6022 unsigned VTBits = VT.getSizeInBits();
6023 SDLoc dl(Op);
6024 SDValue ShOpLo = Op.getOperand(0);
6025 SDValue ShOpHi = Op.getOperand(1);
6026 SDValue ShAmt = Op.getOperand(2);
6027 SDValue ARMcc;
6028 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6029 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6030
6031 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6031, __PRETTY_FUNCTION__))
;
6032
6033 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6034 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6035 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6036 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6037 DAG.getConstant(VTBits, dl, MVT::i32));
6038 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6039 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6040 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6041 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6042 ISD::SETGE, ARMcc, DAG, dl);
6043 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6044 ARMcc, CCR, CmpLo);
6045
6046 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6047 SDValue HiBigShift = Opc == ISD::SRA
6048 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6049 DAG.getConstant(VTBits - 1, dl, VT))
6050 : DAG.getConstant(0, dl, VT);
6051 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6052 ISD::SETGE, ARMcc, DAG, dl);
6053 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6054 ARMcc, CCR, CmpHi);
6055
6056 SDValue Ops[2] = { Lo, Hi };
6057 return DAG.getMergeValues(Ops, dl);
6058}
6059
6060/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6061/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6062SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6063 SelectionDAG &DAG) const {
6064 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6064, __PRETTY_FUNCTION__))
;
6065 EVT VT = Op.getValueType();
6066 unsigned VTBits = VT.getSizeInBits();
6067 SDLoc dl(Op);
6068 SDValue ShOpLo = Op.getOperand(0);
6069 SDValue ShOpHi = Op.getOperand(1);
6070 SDValue ShAmt = Op.getOperand(2);
6071 SDValue ARMcc;
6072 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6073
6074 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6074, __PRETTY_FUNCTION__))
;
6075 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6076 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6077 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6078 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6079 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6080
6081 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6082 DAG.getConstant(VTBits, dl, MVT::i32));
6083 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6084 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6085 ISD::SETGE, ARMcc, DAG, dl);
6086 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6087 ARMcc, CCR, CmpHi);
6088
6089 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6090 ISD::SETGE, ARMcc, DAG, dl);
6091 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6092 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6093 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6094
6095 SDValue Ops[2] = { Lo, Hi };
6096 return DAG.getMergeValues(Ops, dl);
6097}
6098
6099SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6100 SelectionDAG &DAG) const {
6101 // The rounding mode is in bits 23:22 of the FPSCR.
6102 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6103 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6104 // so that the shift + and get folded into a bitfield extract.
6105 SDLoc dl(Op);
6106 SDValue Chain = Op.getOperand(0);
6107 SDValue Ops[] = {Chain,
6108 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6109
6110 SDValue FPSCR =
6111 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6112 Chain = FPSCR.getValue(1);
6113 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6114 DAG.getConstant(1U << 22, dl, MVT::i32));
6115 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6116 DAG.getConstant(22, dl, MVT::i32));
6117 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6118 DAG.getConstant(3, dl, MVT::i32));
6119 return DAG.getMergeValues({And, Chain}, dl);
6120}
6121
6122SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6123 SelectionDAG &DAG) const {
6124 SDLoc DL(Op);
6125 SDValue Chain = Op->getOperand(0);
6126 SDValue RMValue = Op->getOperand(1);
6127
6128 // The rounding mode is in bits 23:22 of the FPSCR.
6129 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6130 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6131 // ((arg - 1) & 3) << 22).
6132 //
6133 // It is expected that the argument of llvm.set.rounding is within the
6134 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6135 // responsibility of the code generated llvm.set.rounding to ensure this
6136 // condition.
6137
6138 // Calculate new value of FPSCR[23:22].
6139 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6140 DAG.getConstant(1, DL, MVT::i32));
6141 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6142 DAG.getConstant(0x3, DL, MVT::i32));
6143 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6144 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6145
6146 // Get current value of FPSCR.
6147 SDValue Ops[] = {Chain,
6148 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6149 SDValue FPSCR =
6150 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6151 Chain = FPSCR.getValue(1);
6152 FPSCR = FPSCR.getValue(0);
6153
6154 // Put new rounding mode into FPSCR[23:22].
6155 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6156 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6157 DAG.getConstant(RMMask, DL, MVT::i32));
6158 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6159 SDValue Ops2[] = {
6160 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6161 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6162}
6163
6164static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
6165 const ARMSubtarget *ST) {
6166 SDLoc dl(N);
6167 EVT VT = N->getValueType(0);
6168 if (VT.isVector() && ST->hasNEON()) {
6169
6170 // Compute the least significant set bit: LSB = X & -X
6171 SDValue X = N->getOperand(0);
6172 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6173 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6174
6175 EVT ElemTy = VT.getVectorElementType();
6176
6177 if (ElemTy == MVT::i8) {
6178 // Compute with: cttz(x) = ctpop(lsb - 1)
6179 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6180 DAG.getTargetConstant(1, dl, ElemTy));
6181 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6182 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6183 }
6184
6185 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6186 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6187 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6188 unsigned NumBits = ElemTy.getSizeInBits();
6189 SDValue WidthMinus1 =
6190 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6191 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6192 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6193 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6194 }
6195
6196 // Compute with: cttz(x) = ctpop(lsb - 1)
6197
6198 // Compute LSB - 1.
6199 SDValue Bits;
6200 if (ElemTy == MVT::i64) {
6201 // Load constant 0xffff'ffff'ffff'ffff to register.
6202 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6203 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6204 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6205 } else {
6206 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6207 DAG.getTargetConstant(1, dl, ElemTy));
6208 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6209 }
6210 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6211 }
6212
6213 if (!ST->hasV6T2Ops())
6214 return SDValue();
6215
6216 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6217 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6218}
6219
6220static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
6221 const ARMSubtarget *ST) {
6222 EVT VT = N->getValueType(0);
6223 SDLoc DL(N);
6224
6225 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6225, __PRETTY_FUNCTION__))
;
6226 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6228, __PRETTY_FUNCTION__))
6227 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6228, __PRETTY_FUNCTION__))
6228 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6228, __PRETTY_FUNCTION__))
;
6229
6230 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6231 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6232 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6233 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6234
6235 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6236 unsigned EltSize = 8;
6237 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6238 while (EltSize != VT.getScalarSizeInBits()) {
6239 SmallVector<SDValue, 8> Ops;
6240 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6241 TLI.getPointerTy(DAG.getDataLayout())));
6242 Ops.push_back(Res);
6243
6244 EltSize *= 2;
6245 NumElts /= 2;
6246 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6247 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6248 }
6249
6250 return Res;
6251}
6252
6253/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6254/// operand of a vector shift operation, where all the elements of the
6255/// build_vector must have the same constant integer value.
6256static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6257 // Ignore bit_converts.
6258 while (Op.getOpcode() == ISD::BITCAST)
6259 Op = Op.getOperand(0);
6260 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6261 APInt SplatBits, SplatUndef;
6262 unsigned SplatBitSize;
6263 bool HasAnyUndefs;
6264 if (!BVN ||
6265 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6266 ElementBits) ||
6267 SplatBitSize > ElementBits)
6268 return false;
6269 Cnt = SplatBits.getSExtValue();
6270 return true;
6271}
6272
6273/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6274/// operand of a vector shift left operation. That value must be in the range:
6275/// 0 <= Value < ElementBits for a left shift; or
6276/// 0 <= Value <= ElementBits for a long left shift.
6277static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6278 assert(VT.isVector() && "vector shift count is not a vector type")((VT.isVector() && "vector shift count is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6278, __PRETTY_FUNCTION__))
;
6279 int64_t ElementBits = VT.getScalarSizeInBits();
6280 if (!getVShiftImm(Op, ElementBits, Cnt))
6281 return false;
6282 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6283}
6284
6285/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6286/// operand of a vector shift right operation. For a shift opcode, the value
6287/// is positive, but for an intrinsic the value count must be negative. The
6288/// absolute value must be in the range:
6289/// 1 <= |Value| <= ElementBits for a right shift; or
6290/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6291static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6292 int64_t &Cnt) {
6293 assert(VT.isVector() && "vector shift count is not a vector type")((VT.isVector() && "vector shift count is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6293, __PRETTY_FUNCTION__))
;
6294 int64_t ElementBits = VT.getScalarSizeInBits();
6295 if (!getVShiftImm(Op, ElementBits, Cnt))
6296 return false;
6297 if (!isIntrinsic)
6298 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6299 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6300 Cnt = -Cnt;
6301 return true;
6302 }
6303 return false;
6304}
6305
6306static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
6307 const ARMSubtarget *ST) {
6308 EVT VT = N->getValueType(0);
6309 SDLoc dl(N);
6310 int64_t Cnt;
6311
6312 if (!VT.isVector())
6313 return SDValue();
6314
6315 // We essentially have two forms here. Shift by an immediate and shift by a
6316 // vector register (there are also shift by a gpr, but that is just handled
6317 // with a tablegen pattern). We cannot easily match shift by an immediate in
6318 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6319 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6320 // signed or unsigned, and a negative shift indicates a shift right).
6321 if (N->getOpcode() == ISD::SHL) {
6322 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6323 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6324 DAG.getConstant(Cnt, dl, MVT::i32));
6325 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6326 N->getOperand(1));
6327 }
6328
6329 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6330, __PRETTY_FUNCTION__))
6330 "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6330, __PRETTY_FUNCTION__))
;
6331
6332 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6333 unsigned VShiftOpc =
6334 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6335 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6336 DAG.getConstant(Cnt, dl, MVT::i32));
6337 }
6338
6339 // Other right shifts we don't have operations for (we use a shift left by a
6340 // negative number).
6341 EVT ShiftVT = N->getOperand(1).getValueType();
6342 SDValue NegatedCount = DAG.getNode(
6343 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6344 unsigned VShiftOpc =
6345 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6346 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6347}
6348
6349static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
6350 const ARMSubtarget *ST) {
6351 EVT VT = N->getValueType(0);
6352 SDLoc dl(N);
6353
6354 // We can get here for a node like i32 = ISD::SHL i32, i64
6355 if (VT != MVT::i64)
6356 return SDValue();
6357
6358 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA || N->getOpcode() == ISD::SHL) && "Unknown shift to lower!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6360, __PRETTY_FUNCTION__))
6359 N->getOpcode() == ISD::SHL) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA || N->getOpcode() == ISD::SHL) && "Unknown shift to lower!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6360, __PRETTY_FUNCTION__))
6360 "Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA || N->getOpcode() == ISD::SHL) && "Unknown shift to lower!"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6360, __PRETTY_FUNCTION__))
;
6361
6362 unsigned ShOpc = N->getOpcode();
6363 if (ST->hasMVEIntegerOps()) {
6364 SDValue ShAmt = N->getOperand(1);
6365 unsigned ShPartsOpc = ARMISD::LSLL;
6366 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6367
6368 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6369 // then do the default optimisation
6370 if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
6371 (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
6372 return SDValue();
6373
6374 // Extract the lower 32 bits of the shift amount if it's not an i32
6375 if (ShAmt->getValueType(0) != MVT::i32)
6376 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6377
6378 if (ShOpc == ISD::SRL) {
6379 if (!Con)
6380 // There is no t2LSRLr instruction so negate and perform an lsll if the
6381 // shift amount is in a register, emulating a right shift.
6382 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6383 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6384 else
6385 // Else generate an lsrl on the immediate shift amount
6386 ShPartsOpc = ARMISD::LSRL;
6387 } else if (ShOpc == ISD::SRA)
6388 ShPartsOpc = ARMISD::ASRL;
6389
6390 // Lower 32 bits of the destination/source
6391 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6392 DAG.getConstant(0, dl, MVT::i32));
6393 // Upper 32 bits of the destination/source
6394 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6395 DAG.getConstant(1, dl, MVT::i32));
6396
6397 // Generate the shift operation as computed above
6398 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6399 ShAmt);
6400 // The upper 32 bits come from the second return value of lsll
6401 Hi = SDValue(Lo.getNode(), 1);
6402 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6403 }
6404
6405 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6406 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6407 return SDValue();
6408
6409 // If we are in thumb mode, we don't have RRX.
6410 if (ST->isThumb1Only())
6411 return SDValue();
6412
6413 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6414 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6415 DAG.getConstant(0, dl, MVT::i32));
6416 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6417 DAG.getConstant(1, dl, MVT::i32));
6418
6419 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
6420 // captures the result into a carry flag.
6421 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
6422 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6423
6424 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6425 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6426
6427 // Merge the pieces into a single i64 value.
6428 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6429}
6430
6431static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
6432 const ARMSubtarget *ST) {
6433 bool Invert = false;
6434 bool Swap = false;
6435 unsigned Opc = ARMCC::AL;
6436
6437 SDValue Op0 = Op.getOperand(0);
6438 SDValue Op1 = Op.getOperand(1);
6439 SDValue CC = Op.getOperand(2);
6440 EVT VT = Op.getValueType();
6441 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6442 SDLoc dl(Op);
6443
6444 EVT CmpVT;
6445 if (ST->hasNEON())
6446 CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
6447 else {
6448 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "No hardware support for integer vector comparison!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6449, __PRETTY_FUNCTION__))
6449 "No hardware support for integer vector comparison!")((ST->hasMVEIntegerOps() && "No hardware support for integer vector comparison!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6449, __PRETTY_FUNCTION__))
;
6450
6451 if (Op.getValueType().getVectorElementType() != MVT::i1)
6452 return SDValue();
6453
6454 // Make sure we expand floating point setcc to scalar if we do not have
6455 // mve.fp, so that we can handle them from there.
6456 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6457 return SDValue();
6458
6459 CmpVT = VT;
6460 }
6461
6462 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6463 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6464 // Special-case integer 64-bit equality comparisons. They aren't legal,
6465 // but they can be lowered with a few vector instructions.
6466 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6467 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6468 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6469 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6470 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6471 DAG.getCondCode(ISD::SETEQ));
6472 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6473 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6474 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6475 if (SetCCOpcode == ISD::SETNE)
6476 Merged = DAG.getNOT(dl, Merged, CmpVT);
6477 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6478 return Merged;
6479 }
6480
6481 if (CmpVT.getVectorElementType() == MVT::i64)
6482 // 64-bit comparisons are not legal in general.
6483 return SDValue();
6484
6485 if (Op1.getValueType().isFloatingPoint()) {
6486 switch (SetCCOpcode) {
6487 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6487)
;
6488 case ISD::SETUNE:
6489 case ISD::SETNE:
6490 if (ST->hasMVEFloatOps()) {
6491 Opc = ARMCC::NE; break;
6492 } else {
6493 Invert = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6494 }
6495 case ISD::SETOEQ:
6496 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6497 case ISD::SETOLT:
6498 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6499 case ISD::SETOGT:
6500 case ISD::SETGT: Opc = ARMCC::GT; break;
6501 case ISD::SETOLE:
6502 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6503 case ISD::SETOGE:
6504 case ISD::SETGE: Opc = ARMCC::GE; break;
6505 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6506 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6507 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6508 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6509 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6510 case ISD::SETONE: {
6511 // Expand this to (OLT | OGT).
6512 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6513 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6514 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6515 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6516 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6517 if (Invert)
6518 Result = DAG.getNOT(dl, Result, VT);
6519 return Result;
6520 }
6521 case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6522 case ISD::SETO: {
6523 // Expand this to (OLT | OGE).
6524 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6525 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6526 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6527 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6528 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6529 if (Invert)
6530 Result = DAG.getNOT(dl, Result, VT);
6531 return Result;
6532 }
6533 }
6534 } else {
6535 // Integer comparisons.
6536 switch (SetCCOpcode) {
6537 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6537)
;
6538 case ISD::SETNE:
6539 if (ST->hasMVEIntegerOps()) {
6540 Opc = ARMCC::NE; break;
6541 } else {
6542 Invert = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6543 }
6544 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6545 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6546 case ISD::SETGT: Opc = ARMCC::GT; break;
6547 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6548 case ISD::SETGE: Opc = ARMCC::GE; break;
6549 case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6550 case ISD::SETUGT: Opc = ARMCC::HI; break;
6551 case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH[[gnu::fallthrough]];
6552 case ISD::SETUGE: Opc = ARMCC::HS; break;
6553 }
6554
6555 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6556 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6557 SDValue AndOp;
6558 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6559 AndOp = Op0;
6560 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6561 AndOp = Op1;
6562
6563 // Ignore bitconvert.
6564 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6565 AndOp = AndOp.getOperand(0);
6566
6567 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6568 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6569 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6570 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6571 if (!Invert)
6572 Result = DAG.getNOT(dl, Result, VT);
6573 return Result;
6574 }
6575 }
6576 }
6577
6578 if (Swap)
6579 std::swap(Op0, Op1);
6580
6581 // If one of the operands is a constant vector zero, attempt to fold the
6582 // comparison to a specialized compare-against-zero form.
6583 SDValue SingleOp;
6584 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6585 SingleOp = Op0;
6586 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6587 if (Opc == ARMCC::GE)
6588 Opc = ARMCC::LE;
6589 else if (Opc == ARMCC::GT)
6590 Opc = ARMCC::LT;
6591 SingleOp = Op1;
6592 }
6593
6594 SDValue Result;
6595 if (SingleOp.getNode()) {
6596 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
6597 DAG.getConstant(Opc, dl, MVT::i32));
6598 } else {
6599 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6600 DAG.getConstant(Opc, dl, MVT::i32));
6601 }
6602
6603 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6604
6605 if (Invert)
6606 Result = DAG.getNOT(dl, Result, VT);
6607
6608 return Result;
6609}
6610
6611static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
6612 SDValue LHS = Op.getOperand(0);
6613 SDValue RHS = Op.getOperand(1);
6614 SDValue Carry = Op.getOperand(2);
6615 SDValue Cond = Op.getOperand(3);
6616 SDLoc DL(Op);
6617
6618 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.")((LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."
) ? static_cast<void> (0) : __assert_fail ("LHS.getSimpleValueType().isInteger() && \"SETCCCARRY is integer only.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6618, __PRETTY_FUNCTION__))
;
6619
6620 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
6621 // have to invert the carry first.
6622 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6623 DAG.getConstant(1, DL, MVT::i32), Carry);
6624 // This converts the boolean value carry into the carry flag.
6625 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6626
6627 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6628 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6629
6630 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6631 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6632 SDValue ARMcc = DAG.getConstant(
6633 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6634 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6635 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6636 Cmp.getValue(1), SDValue());
6637 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6638 CCR, Chain.getValue(1));
6639}
6640
6641/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6642/// valid vector constant for a NEON or MVE instruction with a "modified
6643/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6644static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6645 unsigned SplatBitSize, SelectionDAG &DAG,
6646 const SDLoc &dl, EVT &VT, EVT VectorVT,
6647 VMOVModImmType type) {
6648 unsigned OpCmode, Imm;
6649 bool is128Bits = VectorVT.is128BitVector();
6650
6651 // SplatBitSize is set to the smallest size that splats the vector, so a
6652 // zero vector will always have SplatBitSize == 8. However, NEON modified
6653 // immediate instructions others than VMOV do not support the 8-bit encoding
6654 // of a zero vector, and the default encoding of zero is supposed to be the
6655 // 32-bit version.
6656 if (SplatBits == 0)
6657 SplatBitSize = 32;
6658
6659 switch (SplatBitSize) {
6660 case 8:
6661 if (type != VMOVModImm)
6662 return SDValue();
6663 // Any 1-byte value is OK. Op=0, Cmode=1110.
6664 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6664, __PRETTY_FUNCTION__))
;
6665 OpCmode = 0xe;
6666 Imm = SplatBits;
6667 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6668 break;
6669
6670 case 16:
6671 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6672 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6673 if ((SplatBits & ~0xff) == 0) {
6674 // Value = 0x00nn: Op=x, Cmode=100x.
6675 OpCmode = 0x8;
6676 Imm = SplatBits;
6677 break;
6678 }
6679 if ((SplatBits & ~0xff00) == 0) {
6680 // Value = 0xnn00: Op=x, Cmode=101x.
6681 OpCmode = 0xa;
6682 Imm = SplatBits >> 8;
6683 break;
6684 }
6685 return SDValue();
6686
6687 case 32:
6688 // NEON's 32-bit VMOV supports splat values where:
6689 // * only one byte is nonzero, or
6690 // * the least significant byte is 0xff and the second byte is nonzero, or
6691 // * the least significant 2 bytes are 0xff and the third is nonzero.
6692 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6693 if ((SplatBits & ~0xff) == 0) {
6694 // Value = 0x000000nn: Op=x, Cmode=000x.
6695 OpCmode = 0;
6696 Imm = SplatBits;
6697 break;
6698 }
6699 if ((SplatBits & ~0xff00) == 0) {
6700 // Value = 0x0000nn00: Op=x, Cmode=001x.
6701 OpCmode = 0x2;
6702 Imm = SplatBits >> 8;
6703 break;
6704 }
6705 if ((SplatBits & ~0xff0000) == 0) {
6706 // Value = 0x00nn0000: Op=x, Cmode=010x.
6707 OpCmode = 0x4;
6708 Imm = SplatBits >> 16;
6709 break;
6710 }
6711 if ((SplatBits & ~0xff000000) == 0) {
6712 // Value = 0xnn000000: Op=x, Cmode=011x.
6713 OpCmode = 0x6;
6714 Imm = SplatBits >> 24;
6715 break;
6716 }
6717
6718 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6719 if (type == OtherModImm) return SDValue();
6720
6721 if ((SplatBits & ~0xffff) == 0 &&
6722 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6723 // Value = 0x0000nnff: Op=x, Cmode=1100.
6724 OpCmode = 0xc;
6725 Imm = SplatBits >> 8;
6726 break;
6727 }
6728
6729 // cmode == 0b1101 is not supported for MVE VMVN
6730 if (type == MVEVMVNModImm)
6731 return SDValue();
6732
6733 if ((SplatBits & ~0xffffff) == 0 &&
6734 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6735 // Value = 0x00nnffff: Op=x, Cmode=1101.
6736 OpCmode = 0xd;
6737 Imm = SplatBits >> 16;
6738 break;
6739 }
6740
6741 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6742 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6743 // VMOV.I32. A (very) minor optimization would be to replicate the value
6744 // and fall through here to test for a valid 64-bit splat. But, then the
6745 // caller would also need to check and handle the change in size.
6746 return SDValue();
6747
6748 case 64: {
6749 if (type != VMOVModImm)
6750 return SDValue();
6751 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
6752 uint64_t BitMask = 0xff;
6753 uint64_t Val = 0;
6754 unsigned ImmMask = 1;
6755 Imm = 0;
6756 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
6757 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
6758 Val |= BitMask;
6759 Imm |= ImmMask;
6760 } else if ((SplatBits & BitMask) != 0) {
6761 return SDValue();
6762 }
6763 BitMask <<= 8;
6764 ImmMask <<= 1;
6765 }
6766
6767 if (DAG.getDataLayout().isBigEndian()) {
6768 // Reverse the order of elements within the vector.
6769 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
6770 unsigned Mask = (1 << BytesPerElem) - 1;
6771 unsigned NumElems = 8 / BytesPerElem;
6772 unsigned NewImm = 0;
6773 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
6774 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
6775 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
6776 }
6777 Imm = NewImm;
6778 }
6779
6780 // Op=1, Cmode=1110.
6781 OpCmode = 0x1e;
6782 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
6783 break;
6784 }
6785
6786 default:
6787 llvm_unreachable("unexpected size for isVMOVModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isVMOVModifiedImm"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6787)
;
6788 }
6789
6790 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
6791 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
6792}
6793
6794SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
6795 const ARMSubtarget *ST) const {
6796 EVT VT = Op.getValueType();
6797 bool IsDouble = (VT == MVT::f64);
6798 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
6799 const APFloat &FPVal = CFP->getValueAPF();
6800
6801 // Prevent floating-point constants from using literal loads
6802 // when execute-only is enabled.
6803 if (ST->genExecuteOnly()) {
6804 // If we can represent the constant as an immediate, don't lower it
6805 if (isFPImmLegal(FPVal, VT))
6806 return Op;
6807 // Otherwise, construct as integer, and move to float register
6808 APInt INTVal = FPVal.bitcastToAPInt();
6809 SDLoc DL(CFP);
6810 switch (VT.getSimpleVT().SimpleTy) {
6811 default:
6812 llvm_unreachable("Unknown floating point type!")::llvm::llvm_unreachable_internal("Unknown floating point type!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6812)
;
6813 break;
6814 case MVT::f64: {
6815 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
6816 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
6817 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
6818 }
6819 case MVT::f32:
6820 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
6821 DAG.getConstant(INTVal, DL, MVT::i32));
6822 }
6823 }
6824
6825 if (!ST->hasVFP3Base())
6826 return SDValue();
6827
6828 // Use the default (constant pool) lowering for double constants when we have
6829 // an SP-only FPU
6830 if (IsDouble && !Subtarget->hasFP64())
6831 return SDValue();
6832
6833 // Try splatting with a VMOV.f32...
6834 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
6835
6836 if (ImmVal != -1) {
6837 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
6838 // We have code in place to select a valid ConstantFP already, no need to
6839 // do any mangling.
6840 return Op;
6841 }
6842
6843 // It's a float and we are trying to use NEON operations where
6844 // possible. Lower it to a splat followed by an extract.
6845 SDLoc DL(Op);
6846 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
6847 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
6848 NewVal);
6849 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
6850 DAG.getConstant(0, DL, MVT::i32));
6851 }
6852
6853 // The rest of our options are NEON only, make sure that's allowed before
6854 // proceeding..
6855 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
6856 return SDValue();
6857
6858 EVT VMovVT;
6859 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
6860
6861 // It wouldn't really be worth bothering for doubles except for one very
6862 // important value, which does happen to match: 0.0. So make sure we don't do
6863 // anything stupid.
6864 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
6865 return SDValue();
6866
6867 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
6868 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
6869 VMovVT, VT, VMOVModImm);
6870 if (NewVal != SDValue()) {
6871 SDLoc DL(Op);
6872 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
6873 NewVal);
6874 if (IsDouble)
6875 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6876
6877 // It's a float: cast and extract a vector element.
6878 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6879 VecConstant);
6880 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6881 DAG.getConstant(0, DL, MVT::i32));
6882 }
6883
6884 // Finally, try a VMVN.i32
6885 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
6886 VT, VMVNModImm);
6887 if (NewVal != SDValue()) {
6888 SDLoc DL(Op);
6889 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
6890
6891 if (IsDouble)
6892 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
6893
6894 // It's a float: cast and extract a vector element.
6895 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
6896 VecConstant);
6897 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
6898 DAG.getConstant(0, DL, MVT::i32));
6899 }
6900
6901 return SDValue();
6902}
6903
6904// check if an VEXT instruction can handle the shuffle mask when the
6905// vector sources of the shuffle are the same.
6906static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
6907 unsigned NumElts = VT.getVectorNumElements();
6908
6909 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6910 if (M[0] < 0)
6911 return false;
6912
6913 Imm = M[0];
6914
6915 // If this is a VEXT shuffle, the immediate value is the index of the first
6916 // element. The other shuffle indices must be the successive elements after
6917 // the first one.
6918 unsigned ExpectedElt = Imm;
6919 for (unsigned i = 1; i < NumElts; ++i) {
6920 // Increment the expected index. If it wraps around, just follow it
6921 // back to index zero and keep going.
6922 ++ExpectedElt;
6923 if (ExpectedElt == NumElts)
6924 ExpectedElt = 0;
6925
6926 if (M[i] < 0) continue; // ignore UNDEF indices
6927 if (ExpectedElt != static_cast<unsigned>(M[i]))
6928 return false;
6929 }
6930
6931 return true;
6932}
6933
6934static bool isVEXTMask(ArrayRef<int> M, EVT VT,
6935 bool &ReverseVEXT, unsigned &Imm) {
6936 unsigned NumElts = VT.getVectorNumElements();
6937 ReverseVEXT = false;
6938
6939 // Assume that the first shuffle index is not UNDEF. Fail if it is.
6940 if (M[0] < 0)
6941 return false;
6942
6943 Imm = M[0];
6944
6945 // If this is a VEXT shuffle, the immediate value is the index of the first
6946 // element. The other shuffle indices must be the successive elements after
6947 // the first one.
6948 unsigned ExpectedElt = Imm;
6949 for (unsigned i = 1; i < NumElts; ++i) {
6950 // Increment the expected index. If it wraps around, it may still be
6951 // a VEXT but the source vectors must be swapped.
6952 ExpectedElt += 1;
6953 if (ExpectedElt == NumElts * 2) {
6954 ExpectedElt = 0;
6955 ReverseVEXT = true;
6956 }
6957
6958 if (M[i] < 0) continue; // ignore UNDEF indices
6959 if (ExpectedElt != static_cast<unsigned>(M[i]))
6960 return false;
6961 }
6962
6963 // Adjust the index value if the source operands will be swapped.
6964 if (ReverseVEXT)
6965 Imm -= NumElts;
6966
6967 return true;
6968}
6969
6970static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
6971 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
6972 // range, then 0 is placed into the resulting vector. So pretty much any mask
6973 // of 8 elements can work here.
6974 return VT == MVT::v8i8 && M.size() == 8;
6975}
6976
6977static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
6978 unsigned Index) {
6979 if (Mask.size() == Elements * 2)
6980 return Index / Elements;
6981 return Mask[Index] == 0 ? 0 : 1;
6982}
6983
6984// Checks whether the shuffle mask represents a vector transpose (VTRN) by
6985// checking that pairs of elements in the shuffle mask represent the same index
6986// in each vector, incrementing the expected index by 2 at each step.
6987// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
6988// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
6989// v2={e,f,g,h}
6990// WhichResult gives the offset for each element in the mask based on which
6991// of the two results it belongs to.
6992//
6993// The transpose can be represented either as:
6994// result1 = shufflevector v1, v2, result1_shuffle_mask
6995// result2 = shufflevector v1, v2, result2_shuffle_mask
6996// where v1/v2 and the shuffle masks have the same number of elements
6997// (here WhichResult (see below) indicates which result is being checked)
6998//
6999// or as:
7000// results = shufflevector v1, v2, shuffle_mask
7001// where both results are returned in one vector and the shuffle mask has twice
7002// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7003// want to check the low half and high half of the shuffle mask as if it were
7004// the other case
7005static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7006 unsigned EltSz = VT.getScalarSizeInBits();
7007 if (EltSz == 64)
7008 return false;
7009
7010 unsigned NumElts = VT.getVectorNumElements();
7011 if (M.size() != NumElts && M.size() != NumElts*2)
7012 return false;
7013
7014 // If the mask is twice as long as the input vector then we need to check the
7015 // upper and lower parts of the mask with a matching value for WhichResult
7016 // FIXME: A mask with only even values will be rejected in case the first
7017 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7018 // M[0] is used to determine WhichResult
7019 for (unsigned i = 0; i < M.size(); i += NumElts) {
7020 WhichResult = SelectPairHalf(NumElts, M, i);
7021 for (unsigned j = 0; j < NumElts; j += 2) {
7022 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7023 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7024 return false;
7025 }
7026 }
7027
7028 if (M.size() == NumElts*2)
7029 WhichResult = 0;
7030
7031 return true;
7032}
7033
7034/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7035/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7036/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7037static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7038 unsigned EltSz = VT.getScalarSizeInBits();
7039 if (EltSz == 64)
7040 return false;
7041
7042 unsigned NumElts = VT.getVectorNumElements();
7043 if (M.size() != NumElts && M.size() != NumElts*2)
7044 return false;
7045
7046 for (unsigned i = 0; i < M.size(); i += NumElts) {
7047 WhichResult = SelectPairHalf(NumElts, M, i);
7048 for (unsigned j = 0; j < NumElts; j += 2) {
7049 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7050 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7051 return false;
7052 }
7053 }
7054
7055 if (M.size() == NumElts*2)
7056 WhichResult = 0;
7057
7058 return true;
7059}
7060
7061// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7062// that the mask elements are either all even and in steps of size 2 or all odd
7063// and in steps of size 2.
7064// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7065// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7066// v2={e,f,g,h}
7067// Requires similar checks to that of isVTRNMask with
7068// respect the how results are returned.
7069static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7070 unsigned EltSz = VT.getScalarSizeInBits();
7071 if (EltSz == 64)
7072 return false;
7073
7074 unsigned NumElts = VT.getVectorNumElements();
7075 if (M.size() != NumElts && M.size() != NumElts*2)
7076 return false;
7077
7078 for (unsigned i = 0; i < M.size(); i += NumElts) {
7079 WhichResult = SelectPairHalf(NumElts, M, i);
7080 for (unsigned j = 0; j < NumElts; ++j) {
7081 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7082 return false;
7083 }
7084 }
7085
7086 if (M.size() == NumElts*2)
7087 WhichResult = 0;
7088
7089 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7090 if (VT.is64BitVector() && EltSz == 32)
7091 return false;
7092
7093 return true;
7094}
7095
7096/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7097/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7098/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7099static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7100 unsigned EltSz = VT.getScalarSizeInBits();
7101 if (EltSz == 64)
7102 return false;
7103
7104 unsigned NumElts = VT.getVectorNumElements();
7105 if (M.size() != NumElts && M.size() != NumElts*2)
7106 return false;
7107
7108 unsigned Half = NumElts / 2;
7109 for (unsigned i = 0; i < M.size(); i += NumElts) {
7110 WhichResult = SelectPairHalf(NumElts, M, i);
7111 for (unsigned j = 0; j < NumElts; j += Half) {
7112 unsigned Idx = WhichResult;
7113 for (unsigned k = 0; k < Half; ++k) {
7114 int MIdx = M[i + j + k];
7115 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7116 return false;
7117 Idx += 2;
7118 }
7119 }
7120 }
7121
7122 if (M.size() == NumElts*2)
7123 WhichResult = 0;
7124
7125 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7126 if (VT.is64BitVector() && EltSz == 32)
7127 return false;
7128
7129 return true;
7130}
7131
7132// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7133// that pairs of elements of the shufflemask represent the same index in each
7134// vector incrementing sequentially through the vectors.
7135// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7136// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7137// v2={e,f,g,h}
7138// Requires similar checks to that of isVTRNMask with respect the how results
7139// are returned.
7140static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7141 unsigned EltSz = VT.getScalarSizeInBits();
7142 if (EltSz == 64)
7143 return false;
7144
7145 unsigned NumElts = VT.getVectorNumElements();
7146 if (M.size() != NumElts && M.size() != NumElts*2)
7147 return false;
7148
7149 for (unsigned i = 0; i < M.size(); i += NumElts) {
7150 WhichResult = SelectPairHalf(NumElts, M, i);
7151 unsigned Idx = WhichResult * NumElts / 2;
7152 for (unsigned j = 0; j < NumElts; j += 2) {
7153 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7154 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7155 return false;
7156 Idx += 1;
7157 }
7158 }
7159
7160 if (M.size() == NumElts*2)
7161 WhichResult = 0;
7162
7163 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7164 if (VT.is64BitVector() && EltSz == 32)
7165 return false;
7166
7167 return true;
7168}
7169
7170/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7171/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7172/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7173static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7174 unsigned EltSz = VT.getScalarSizeInBits();
7175 if (EltSz == 64)
7176 return false;
7177
7178 unsigned NumElts = VT.getVectorNumElements();
7179 if (M.size() != NumElts && M.size() != NumElts*2)
7180 return false;
7181
7182 for (unsigned i = 0; i < M.size(); i += NumElts) {
7183 WhichResult = SelectPairHalf(NumElts, M, i);
7184 unsigned Idx = WhichResult * NumElts / 2;
7185 for (unsigned j = 0; j < NumElts; j += 2) {
7186 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7187 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7188 return false;
7189 Idx += 1;
7190 }
7191 }
7192
7193 if (M.size() == NumElts*2)
7194 WhichResult = 0;
7195
7196 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7197 if (VT.is64BitVector() && EltSz == 32)
7198 return false;
7199
7200 return true;
7201}
7202
7203/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7204/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7205static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7206 unsigned &WhichResult,
7207 bool &isV_UNDEF) {
7208 isV_UNDEF = false;
7209 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7210 return ARMISD::VTRN;
7211 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7212 return ARMISD::VUZP;
7213 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7214 return ARMISD::VZIP;
7215
7216 isV_UNDEF = true;
7217 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7218 return ARMISD::VTRN;
7219 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7220 return ARMISD::VUZP;
7221 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7222 return ARMISD::VZIP;
7223
7224 return 0;
7225}
7226
7227/// \return true if this is a reverse operation on an vector.
7228static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7229 unsigned NumElts = VT.getVectorNumElements();
7230 // Make sure the mask has the right size.
7231 if (NumElts != M.size())
7232 return false;
7233
7234 // Look for <15, ..., 3, -1, 1, 0>.
7235 for (unsigned i = 0; i != NumElts; ++i)
7236 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7237 return false;
7238
7239 return true;
7240}
7241
7242static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7243 unsigned NumElts = VT.getVectorNumElements();
7244 // Make sure the mask has the right size.
7245 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7246 return false;
7247
7248 // If Top
7249 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7250 // This inserts Input2 into Input1
7251 // else if not Top
7252 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7253 // This inserts Input1 into Input2
7254 unsigned Offset = Top ? 0 : 1;
7255 unsigned N = SingleSource ? 0 : NumElts;
7256 for (unsigned i = 0; i < NumElts; i += 2) {
7257 if (M[i] >= 0 && M[i] != (int)i)
7258 return false;
7259 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7260 return false;
7261 }
7262
7263 return true;
7264}
7265
7266// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7267// from a pair of inputs. For example:
7268// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7269// FP_ROUND(EXTRACT_ELT(Y, 0),
7270// FP_ROUND(EXTRACT_ELT(X, 1),
7271// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7272static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG,
7273 const ARMSubtarget *ST) {
7274 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7274, __PRETTY_FUNCTION__))
;
7275 if (!ST->hasMVEFloatOps())
7276 return SDValue();
7277
7278 SDLoc dl(BV);
7279 EVT VT = BV.getValueType();
7280 if (VT != MVT::v8f16)
7281 return SDValue();
7282
7283 // We are looking for a buildvector of fptrunc elements, where all the
7284 // elements are interleavingly extracted from two sources. Check the first two
7285 // items are valid enough and extract some info from them (they are checked
7286 // properly in the loop below).
7287 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7288 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7289 BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)
7290 return SDValue();
7291 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7292 BV.getOperand(1).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7293 BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)
7294 return SDValue();
7295 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7296 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7297 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7298 return SDValue();
7299
7300 // Check all the values in the BuildVector line up with our expectations.
7301 for (unsigned i = 1; i < 4; i++) {
7302 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7303 return Trunc.getOpcode() == ISD::FP_ROUND &&
7304 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7305 Trunc.getOperand(0).getOperand(0) == Op &&
7306 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7307 };
7308 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7309 return SDValue();
7310 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7311 return SDValue();
7312 }
7313
7314 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7315 DAG.getConstant(0, dl, MVT::i32));
7316 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7317 DAG.getConstant(1, dl, MVT::i32));
7318}
7319
7320// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7321// from a single input on alternating lanes. For example:
7322// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7323// FP_ROUND(EXTRACT_ELT(X, 2),
7324// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7325static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG,
7326 const ARMSubtarget *ST) {
7327 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7327, __PRETTY_FUNCTION__))
;
7328 if (!ST->hasMVEFloatOps())
7329 return SDValue();
7330
7331 SDLoc dl(BV);
7332 EVT VT = BV.getValueType();
7333 if (VT != MVT::v4f32)
7334 return SDValue();
7335
7336 // We are looking for a buildvector of fptext elements, where all the
7337 // elements are alternating lanes from a single source. For example <0,2,4,6>
7338 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7339 // info from them (they are checked properly in the loop below).
7340 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7341 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7342 return SDValue();
7343 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7344 int Offset = BV.getOperand(0).getOperand(0).getConstantOperandVal(1);
7345 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7346 return SDValue();
7347
7348 // Check all the values in the BuildVector line up with our expectations.
7349 for (unsigned i = 1; i < 4; i++) {
7350 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7351 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7352 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7353 Trunc.getOperand(0).getOperand(0) == Op &&
7354 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7355 };
7356 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7357 return SDValue();
7358 }
7359
7360 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7361 DAG.getConstant(Offset, dl, MVT::i32));
7362}
7363
7364// If N is an integer constant that can be moved into a register in one
7365// instruction, return an SDValue of such a constant (will become a MOV
7366// instruction). Otherwise return null.
7367static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
7368 const ARMSubtarget *ST, const SDLoc &dl) {
7369 uint64_t Val;
7370 if (!isa<ConstantSDNode>(N))
7371 return SDValue();
7372 Val = cast<ConstantSDNode>(N)->getZExtValue();
7373
7374 if (ST->isThumb1Only()) {
7375 if (Val <= 255 || ~Val <= 255)
7376 return DAG.getConstant(Val, dl, MVT::i32);
7377 } else {
7378 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7379 return DAG.getConstant(Val, dl, MVT::i32);
7380 }
7381 return SDValue();
7382}
7383
7384static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
7385 const ARMSubtarget *ST) {
7386 SDLoc dl(Op);
7387 EVT VT = Op.getValueType();
7388
7389 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!")((ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"LowerBUILD_VECTOR_i1 called without MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7389, __PRETTY_FUNCTION__))
;
7390
7391 unsigned NumElts = VT.getVectorNumElements();
7392 unsigned BoolMask;
7393 unsigned BitsPerBool;
7394 if (NumElts == 4) {
7395 BitsPerBool = 4;
7396 BoolMask = 0xf;
7397 } else if (NumElts == 8) {
7398 BitsPerBool = 2;
7399 BoolMask = 0x3;
7400 } else if (NumElts == 16) {
7401 BitsPerBool = 1;
7402 BoolMask = 0x1;
7403 } else
7404 return SDValue();
7405
7406 // If this is a single value copied into all lanes (a splat), we can just sign
7407 // extend that single value
7408 SDValue FirstOp = Op.getOperand(0);
7409 if (!isa<ConstantSDNode>(FirstOp) &&
7410 std::all_of(std::next(Op->op_begin()), Op->op_end(),
7411 [&FirstOp](SDUse &U) {
7412 return U.get().isUndef() || U.get() == FirstOp;
7413 })) {
7414 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7415 DAG.getValueType(MVT::i1));
7416 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7417 }
7418
7419 // First create base with bits set where known
7420 unsigned Bits32 = 0;
7421 for (unsigned i = 0; i < NumElts; ++i) {
7422 SDValue V = Op.getOperand(i);
7423 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7424 continue;
7425 bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
7426 if (BitSet)
7427 Bits32 |= BoolMask << (i * BitsPerBool);
7428 }
7429
7430 // Add in unknown nodes
7431 SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
7432 DAG.getConstant(Bits32, dl, MVT::i32));
7433 for (unsigned i = 0; i < NumElts; ++i) {
7434 SDValue V = Op.getOperand(i);
7435 if (isa<ConstantSDNode>(V) || V.isUndef())
7436 continue;
7437 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7438 DAG.getConstant(i, dl, MVT::i32));
7439 }
7440
7441 return Base;
7442}
7443
7444// If this is a case we can't handle, return null and let the default
7445// expansion code take care of it.
7446SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7447 const ARMSubtarget *ST) const {
7448 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7449 SDLoc dl(Op);
7450 EVT VT = Op.getValueType();
7451
7452 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7453 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7454
7455 APInt SplatBits, SplatUndef;
7456 unsigned SplatBitSize;
7457 bool HasAnyUndefs;
7458 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7459 if (SplatUndef.isAllOnesValue())
7460 return DAG.getUNDEF(VT);
7461
7462 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7463 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7464 // Check if an immediate VMOV works.
7465 EVT VmovVT;
7466 SDValue Val =
7467 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7468 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7469
7470 if (Val.getNode()) {
7471 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7472 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7473 }
7474
7475 // Try an immediate VMVN.
7476 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7477 Val = isVMOVModifiedImm(
7478 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7479 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7480 if (Val.getNode()) {
7481 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7482 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7483 }
7484
7485 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7486 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7487 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7488 if (ImmVal != -1) {
7489 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7490 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7491 }
7492 }
7493 }
7494 }
7495
7496 // Scan through the operands to see if only one value is used.
7497 //
7498 // As an optimisation, even if more than one value is used it may be more
7499 // profitable to splat with one value then change some lanes.
7500 //
7501 // Heuristically we decide to do this if the vector has a "dominant" value,
7502 // defined as splatted to more than half of the lanes.
7503 unsigned NumElts = VT.getVectorNumElements();
7504 bool isOnlyLowElement = true;
7505 bool usesOnlyOneValue = true;
7506 bool hasDominantValue = false;
7507 bool isConstant = true;
7508
7509 // Map of the number of times a particular SDValue appears in the
7510 // element list.
7511 DenseMap<SDValue, unsigned> ValueCounts;
7512 SDValue Value;
7513 for (unsigned i = 0; i < NumElts; ++i) {
7514 SDValue V = Op.getOperand(i);
7515 if (V.isUndef())
7516 continue;
7517 if (i > 0)
7518 isOnlyLowElement = false;
7519 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7520 isConstant = false;
7521
7522 ValueCounts.insert(std::make_pair(V, 0));
7523 unsigned &Count = ValueCounts[V];
7524
7525 // Is this value dominant? (takes up more than half of the lanes)
7526 if (++Count > (NumElts / 2)) {
7527 hasDominantValue = true;
7528 Value = V;
7529 }
7530 }
7531 if (ValueCounts.size() != 1)
7532 usesOnlyOneValue = false;
7533 if (!Value.getNode() && !ValueCounts.empty())
7534 Value = ValueCounts.begin()->first;
7535
7536 if (ValueCounts.empty())
7537 return DAG.getUNDEF(VT);
7538
7539 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7540 // Keep going if we are hitting this case.
7541 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7542 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7543
7544 unsigned EltSize = VT.getScalarSizeInBits();
7545
7546 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7547 // i32 and try again.
7548 if (hasDominantValue && EltSize <= 32) {
7549 if (!isConstant) {
7550 SDValue N;
7551
7552 // If we are VDUPing a value that comes directly from a vector, that will
7553 // cause an unnecessary move to and from a GPR, where instead we could
7554 // just use VDUPLANE. We can only do this if the lane being extracted
7555 // is at a constant index, as the VDUP from lane instructions only have
7556 // constant-index forms.
7557 ConstantSDNode *constIndex;
7558 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7559 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7560 // We need to create a new undef vector to use for the VDUPLANE if the
7561 // size of the vector from which we get the value is different than the
7562 // size of the vector that we need to create. We will insert the element
7563 // such that the register coalescer will remove unnecessary copies.
7564 if (VT != Value->getOperand(0).getValueType()) {
7565 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7566 VT.getVectorNumElements();
7567 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7568 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7569 Value, DAG.getConstant(index, dl, MVT::i32)),
7570 DAG.getConstant(index, dl, MVT::i32));
7571 } else
7572 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7573 Value->getOperand(0), Value->getOperand(1));
7574 } else
7575 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7576
7577 if (!usesOnlyOneValue) {
7578 // The dominant value was splatted as 'N', but we now have to insert
7579 // all differing elements.
7580 for (unsigned I = 0; I < NumElts; ++I) {
7581 if (Op.getOperand(I) == Value)
7582 continue;
7583 SmallVector<SDValue, 3> Ops;
7584 Ops.push_back(N);
7585 Ops.push_back(Op.getOperand(I));
7586 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
7587 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
7588 }
7589 }
7590 return N;
7591 }
7592 if (VT.getVectorElementType().isFloatingPoint()) {
7593 SmallVector<SDValue, 8> Ops;
7594 MVT FVT = VT.getVectorElementType().getSimpleVT();
7595 assert(FVT == MVT::f32 || FVT == MVT::f16)((FVT == MVT::f32 || FVT == MVT::f16) ? static_cast<void>
(0) : __assert_fail ("FVT == MVT::f32 || FVT == MVT::f16", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7595, __PRETTY_FUNCTION__))
;
7596 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7597 for (unsigned i = 0; i < NumElts; ++i)
7598 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
7599 Op.getOperand(i)));
7600 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
7601 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7602 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7603 if (Val.getNode())
7604 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7605 }
7606 if (usesOnlyOneValue) {
7607 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
7608 if (isConstant && Val.getNode())
7609 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
7610 }
7611 }
7612
7613 // If all elements are constants and the case above didn't get hit, fall back
7614 // to the default expansion, which will generate a load from the constant
7615 // pool.
7616 if (isConstant)
7617 return SDValue();
7618
7619 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
7620 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
7621 // length <= 2.
7622 if (NumElts >= 4)
7623 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
7624 return shuffle;
7625
7626 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
7627 // VCVT's
7628 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
7629 return VCVT;
7630 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
7631 return VCVT;
7632
7633 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
7634 // If we haven't found an efficient lowering, try splitting a 128-bit vector
7635 // into two 64-bit vectors; we might discover a better way to lower it.
7636 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
7637 EVT ExtVT = VT.getVectorElementType();
7638 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
7639 SDValue Lower =
7640 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
7641 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
7642 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
7643 SDValue Upper = DAG.getBuildVector(
7644 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
7645 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
7646 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
7647 if (Lower && Upper)
7648 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
7649 }
7650
7651 // Vectors with 32- or 64-bit elements can be built by directly assigning
7652 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
7653 // will be legalized.
7654 if (EltSize >= 32) {
7655 // Do the expansion with floating-point types, since that is what the VFP
7656 // registers are defined to use, and since i64 is not legal.
7657 EVT EltVT = EVT::getFloatingPointVT(EltSize);
7658 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
7659 SmallVector<SDValue, 8> Ops;
7660 for (unsigned i = 0; i < NumElts; ++i)
7661 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
7662 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
7663 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7664 }
7665
7666 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
7667 // know the default expansion would otherwise fall back on something even
7668 // worse. For a vector with one or two non-undef values, that's
7669 // scalar_to_vector for the elements followed by a shuffle (provided the
7670 // shuffle is valid for the target) and materialization element by element
7671 // on the stack followed by a load for everything else.
7672 if (!isConstant && !usesOnlyOneValue) {
7673 SDValue Vec = DAG.getUNDEF(VT);
7674 for (unsigned i = 0 ; i < NumElts; ++i) {
7675 SDValue V = Op.getOperand(i);
7676 if (V.isUndef())
7677 continue;
7678 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
7679 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
7680 }
7681 return Vec;
7682 }
7683
7684 return SDValue();
7685}
7686
7687// Gather data to see if the operation can be modelled as a
7688// shuffle in combination with VEXTs.
7689SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
7690 SelectionDAG &DAG) const {
7691 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7691, __PRETTY_FUNCTION__))
;
7692 SDLoc dl(Op);
7693 EVT VT = Op.getValueType();
7694 unsigned NumElts = VT.getVectorNumElements();
7695
7696 struct ShuffleSourceInfo {
7697 SDValue Vec;
7698 unsigned MinElt = std::numeric_limits<unsigned>::max();
7699 unsigned MaxElt = 0;
7700
7701 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
7702 // be compatible with the shuffle we intend to construct. As a result
7703 // ShuffleVec will be some sliding window into the original Vec.
7704 SDValue ShuffleVec;
7705
7706 // Code should guarantee that element i in Vec starts at element "WindowBase
7707 // + i * WindowScale in ShuffleVec".
7708 int WindowBase = 0;
7709 int WindowScale = 1;
7710
7711 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
7712
7713 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
7714 };
7715
7716 // First gather all vectors used as an immediate source for this BUILD_VECTOR
7717 // node.
7718 SmallVector<ShuffleSourceInfo, 2> Sources;
7719 for (unsigned i = 0; i < NumElts; ++i) {
7720 SDValue V = Op.getOperand(i);
7721 if (V.isUndef())
7722 continue;
7723 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
7724 // A shuffle can only come from building a vector from various
7725 // elements of other vectors.
7726 return SDValue();
7727 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
7728 // Furthermore, shuffles require a constant mask, whereas extractelts
7729 // accept variable indices.
7730 return SDValue();
7731 }
7732
7733 // Add this element source to the list if it's not already there.
7734 SDValue SourceVec = V.getOperand(0);
7735 auto Source = llvm::find(Sources, SourceVec);
7736 if (Source == Sources.end())
7737 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
7738
7739 // Update the minimum and maximum lane number seen.
7740 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
7741 Source->MinElt = std::min(Source->MinElt, EltNo);
7742 Source->MaxElt = std::max(Source->MaxElt, EltNo);
7743 }
7744
7745 // Currently only do something sane when at most two source vectors
7746 // are involved.
7747 if (Sources.size() > 2)
7748 return SDValue();
7749
7750 // Find out the smallest element size among result and two sources, and use
7751 // it as element size to build the shuffle_vector.
7752 EVT SmallestEltTy = VT.getVectorElementType();
7753 for (auto &Source : Sources) {
7754 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
7755 if (SrcEltTy.bitsLT(SmallestEltTy))
7756 SmallestEltTy = SrcEltTy;
7757 }
7758 unsigned ResMultiplier =
7759 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
7760 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
7761 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
7762
7763 // If the source vector is too wide or too narrow, we may nevertheless be able
7764 // to construct a compatible shuffle either by concatenating it with UNDEF or
7765 // extracting a suitable range of elements.
7766 for (auto &Src : Sources) {
7767 EVT SrcVT = Src.ShuffleVec.getValueType();
7768
7769 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
7770 uint64_t VTSize = VT.getFixedSizeInBits();
7771 if (SrcVTSize == VTSize)
7772 continue;
7773
7774 // This stage of the search produces a source with the same element type as
7775 // the original, but with a total width matching the BUILD_VECTOR output.
7776 EVT EltVT = SrcVT.getVectorElementType();
7777 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
7778 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
7779
7780 if (SrcVTSize < VTSize) {
7781 if (2 * SrcVTSize != VTSize)
7782 return SDValue();
7783 // We can pad out the smaller vector for free, so if it's part of a
7784 // shuffle...
7785 Src.ShuffleVec =
7786 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
7787 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
7788 continue;
7789 }
7790
7791 if (SrcVTSize != 2 * VTSize)
7792 return SDValue();
7793
7794 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
7795 // Span too large for a VEXT to cope
7796 return SDValue();
7797 }
7798
7799 if (Src.MinElt >= NumSrcElts) {
7800 // The extraction can just take the second half
7801 Src.ShuffleVec =
7802 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7803 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7804 Src.WindowBase = -NumSrcElts;
7805 } else if (Src.MaxElt < NumSrcElts) {
7806 // The extraction can just take the first half
7807 Src.ShuffleVec =
7808 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7809 DAG.getConstant(0, dl, MVT::i32));
7810 } else {
7811 // An actual VEXT is needed
7812 SDValue VEXTSrc1 =
7813 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7814 DAG.getConstant(0, dl, MVT::i32));
7815 SDValue VEXTSrc2 =
7816 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
7817 DAG.getConstant(NumSrcElts, dl, MVT::i32));
7818
7819 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
7820 VEXTSrc2,
7821 DAG.getConstant(Src.MinElt, dl, MVT::i32));
7822 Src.WindowBase = -Src.MinElt;
7823 }
7824 }
7825
7826 // Another possible incompatibility occurs from the vector element types. We
7827 // can fix this by bitcasting the source vectors to the same type we intend
7828 // for the shuffle.
7829 for (auto &Src : Sources) {
7830 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
7831 if (SrcEltTy == SmallestEltTy)
7832 continue;
7833 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7833, __PRETTY_FUNCTION__))
;
7834 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
7835 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
7836 Src.WindowBase *= Src.WindowScale;
7837 }
7838
7839 // Final sanity check before we try to actually produce a shuffle.
7840 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7842, __PRETTY_FUNCTION__));; } } while (false)
7841 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7842, __PRETTY_FUNCTION__));; } } while (false)
7842 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7842, __PRETTY_FUNCTION__));; } } while (false)
;
7843
7844 // The stars all align, our next step is to produce the mask for the shuffle.
7845 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
7846 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
7847 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
7848 SDValue Entry = Op.getOperand(i);
7849 if (Entry.isUndef())
7850 continue;
7851
7852 auto Src = llvm::find(Sources, Entry.getOperand(0));
7853 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
7854
7855 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
7856 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
7857 // segment.
7858 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
7859 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
7860 VT.getScalarSizeInBits());
7861 int LanesDefined = BitsDefined / BitsPerShuffleLane;
7862
7863 // This source is expected to fill ResMultiplier lanes of the final shuffle,
7864 // starting at the appropriate offset.
7865 int *LaneMask = &Mask[i * ResMultiplier];
7866
7867 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
7868 ExtractBase += NumElts * (Src - Sources.begin());
7869 for (int j = 0; j < LanesDefined; ++j)
7870 LaneMask[j] = ExtractBase + j;
7871 }
7872
7873
7874 // We can't handle more than two sources. This should have already
7875 // been checked before this point.
7876 assert(Sources.size() <= 2 && "Too many sources!")((Sources.size() <= 2 && "Too many sources!") ? static_cast
<void> (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7876, __PRETTY_FUNCTION__))
;
7877
7878 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
7879 for (unsigned i = 0; i < Sources.size(); ++i)
7880 ShuffleOps[i] = Sources[i].ShuffleVec;
7881
7882 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
7883 ShuffleOps[1], Mask, DAG);
7884 if (!Shuffle)
7885 return SDValue();
7886 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
7887}
7888
7889enum ShuffleOpCodes {
7890 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7891 OP_VREV,
7892 OP_VDUP0,
7893 OP_VDUP1,
7894 OP_VDUP2,
7895 OP_VDUP3,
7896 OP_VEXT1,
7897 OP_VEXT2,
7898 OP_VEXT3,
7899 OP_VUZPL, // VUZP, left result
7900 OP_VUZPR, // VUZP, right result
7901 OP_VZIPL, // VZIP, left result
7902 OP_VZIPR, // VZIP, right result
7903 OP_VTRNL, // VTRN, left result
7904 OP_VTRNR // VTRN, right result
7905};
7906
7907static bool isLegalMVEShuffleOp(unsigned PFEntry) {
7908 unsigned OpNum = (PFEntry >> 26) & 0x0F;
7909 switch (OpNum) {
7910 case OP_COPY:
7911 case OP_VREV:
7912 case OP_VDUP0:
7913 case OP_VDUP1:
7914 case OP_VDUP2:
7915 case OP_VDUP3:
7916 return true;
7917 }
7918 return false;
7919}
7920
7921/// isShuffleMaskLegal - Targets can use this to indicate that they only
7922/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
7923/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
7924/// are assumed to be legal.
7925bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
7926 if (VT.getVectorNumElements() == 4 &&
7927 (VT.is128BitVector() || VT.is64BitVector())) {
7928 unsigned PFIndexes[4];
7929 for (unsigned i = 0; i != 4; ++i) {
7930 if (M[i] < 0)
7931 PFIndexes[i] = 8;
7932 else
7933 PFIndexes[i] = M[i];
7934 }
7935
7936 // Compute the index in the perfect shuffle table.
7937 unsigned PFTableIndex =
7938 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7939 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7940 unsigned Cost = (PFEntry >> 30);
7941
7942 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
7943 return true;
7944 }
7945
7946 bool ReverseVEXT, isV_UNDEF;
7947 unsigned Imm, WhichResult;
7948
7949 unsigned EltSize = VT.getScalarSizeInBits();
7950 if (EltSize >= 32 ||
7951 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
7952 ShuffleVectorInst::isIdentityMask(M) ||
7953 isVREVMask(M, VT, 64) ||
7954 isVREVMask(M, VT, 32) ||
7955 isVREVMask(M, VT, 16))
7956 return true;
7957 else if (Subtarget->hasNEON() &&
7958 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
7959 isVTBLMask(M, VT) ||
7960 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
7961 return true;
7962 else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
7963 isReverseMask(M, VT))
7964 return true;
7965 else if (Subtarget->hasMVEIntegerOps() &&
7966 (isVMOVNMask(M, VT, true, false) ||
7967 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
7968 return true;
7969 else
7970 return false;
7971}
7972
7973/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7974/// the specified operations to build the shuffle.
7975static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7976 SDValue RHS, SelectionDAG &DAG,
7977 const SDLoc &dl) {
7978 unsigned OpNum = (PFEntry >> 26) & 0x0F;
7979 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7980 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
7981
7982 if (OpNum == OP_COPY) {
7983 if (LHSID == (1*9+2)*9+3) return LHS;
7984 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7984, __PRETTY_FUNCTION__))
;
7985 return RHS;
7986 }
7987
7988 SDValue OpLHS, OpRHS;
7989 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7990 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7991 EVT VT = OpLHS.getValueType();
7992
7993 switch (OpNum) {
7994 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 7994)
;
7995 case OP_VREV:
7996 // VREV divides the vector in half and swaps within the half.
7997 if (VT.getVectorElementType() == MVT::i32 ||
7998 VT.getVectorElementType() == MVT::f32)
7999 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8000 // vrev <4 x i16> -> VREV32
8001 if (VT.getVectorElementType() == MVT::i16 ||
8002 VT.getVectorElementType() == MVT::f16)
8003 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8004 // vrev <4 x i8> -> VREV16
8005 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8005, __PRETTY_FUNCTION__))
;
8006 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8007 case OP_VDUP0:
8008 case OP_VDUP1:
8009 case OP_VDUP2:
8010 case OP_VDUP3:
8011 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8012 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8013 case OP_VEXT1:
8014 case OP_VEXT2:
8015 case OP_VEXT3:
8016 return DAG.getNode(ARMISD::VEXT, dl, VT,
8017 OpLHS, OpRHS,
8018 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8019 case OP_VUZPL:
8020 case OP_VUZPR:
8021 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8022 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8023 case OP_VZIPL:
8024 case OP_VZIPR:
8025 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8026 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8027 case OP_VTRNL:
8028 case OP_VTRNR:
8029 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8030 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8031 }
8032}
8033
8034static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
8035 ArrayRef<int> ShuffleMask,
8036 SelectionDAG &DAG) {
8037 // Check to see if we can use the VTBL instruction.
8038 SDValue V1 = Op.getOperand(0);
8039 SDValue V2 = Op.getOperand(1);
8040 SDLoc DL(Op);
8041
8042 SmallVector<SDValue, 8> VTBLMask;
8043 for (ArrayRef<int>::iterator
8044 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
8045 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
8046
8047 if (V2.getNode()->isUndef())
8048 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8049 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8050
8051 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8052 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8053}
8054
8055static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
8056 SelectionDAG &DAG) {
8057 SDLoc DL(Op);
8058 SDValue OpLHS = Op.getOperand(0);
8059 EVT VT = OpLHS.getValueType();
8060
8061 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8062, __PRETTY_FUNCTION__))
8062 "Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8062, __PRETTY_FUNCTION__))
;
8063 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
8064 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
8065 // extract the first 8 bytes into the top double word and the last 8 bytes
8066 // into the bottom double word. The v8i16 case is similar.
8067 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
8068 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
8069 DAG.getConstant(ExtractNum, DL, MVT::i32));
8070}
8071
8072static EVT getVectorTyFromPredicateVector(EVT VT) {
8073 switch (VT.getSimpleVT().SimpleTy) {
8074 case MVT::v4i1:
8075 return MVT::v4i32;
8076 case MVT::v8i1:
8077 return MVT::v8i16;
8078 case MVT::v16i1:
8079 return MVT::v16i8;
8080 default:
8081 llvm_unreachable("Unexpected vector predicate type")::llvm::llvm_unreachable_internal("Unexpected vector predicate type"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8081)
;
8082 }
8083}
8084
8085static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT,
8086 SelectionDAG &DAG) {
8087 // Converting from boolean predicates to integers involves creating a vector
8088 // of all ones or all zeroes and selecting the lanes based upon the real
8089 // predicate.
8090 SDValue AllOnes =
8091 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8092 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8093
8094 SDValue AllZeroes =
8095 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8096 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8097
8098 // Get full vector type from predicate type
8099 EVT NewVT = getVectorTyFromPredicateVector(VT);
8100
8101 SDValue RecastV1;
8102 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8103 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8104 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8105 // since we know in hardware the sizes are really the same.
8106 if (VT != MVT::v16i1)
8107 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8108 else
8109 RecastV1 = Pred;
8110
8111 // Select either all ones or zeroes depending upon the real predicate bits.
8112 SDValue PredAsVector =
8113 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8114
8115 // Recast our new predicate-as-integer v16i8 vector into something
8116 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8117 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8118}
8119
8120static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
8121 const ARMSubtarget *ST) {
8122 EVT VT = Op.getValueType();
8123 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8124 ArrayRef<int> ShuffleMask = SVN->getMask();
8125
8126 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "No support for vector shuffle of boolean predicates"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8127, __PRETTY_FUNCTION__))
8127 "No support for vector shuffle of boolean predicates")((ST->hasMVEIntegerOps() && "No support for vector shuffle of boolean predicates"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8127, __PRETTY_FUNCTION__))
;
8128
8129 SDValue V1 = Op.getOperand(0);
8130 SDLoc dl(Op);
8131 if (isReverseMask(ShuffleMask, VT)) {
8132 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8133 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8134 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8135 DAG.getConstant(16, dl, MVT::i32));
8136 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8137 }
8138
8139 // Until we can come up with optimised cases for every single vector
8140 // shuffle in existence we have chosen the least painful strategy. This is
8141 // to essentially promote the boolean predicate to a 8-bit integer, where
8142 // each predicate represents a byte. Then we fall back on a normal integer
8143 // vector shuffle and convert the result back into a predicate vector. In
8144 // many cases the generated code might be even better than scalar code
8145 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8146 // fields in a register into 8 other arbitrary 2-bit fields!
8147 SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG);
8148 EVT NewVT = PredAsVector.getValueType();
8149
8150 // Do the shuffle!
8151 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector,
8152 DAG.getUNDEF(NewVT), ShuffleMask);
8153
8154 // Now return the result of comparing the shuffled vector with zero,
8155 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8156 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8157 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8158}
8159
8160static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
8161 ArrayRef<int> ShuffleMask,
8162 SelectionDAG &DAG) {
8163 // Attempt to lower the vector shuffle using as many whole register movs as
8164 // possible. This is useful for types smaller than 32bits, which would
8165 // often otherwise become a series for grp movs.
8166 SDLoc dl(Op);
8167 EVT VT = Op.getValueType();
8168 if (VT.getScalarSizeInBits() >= 32)
8169 return SDValue();
8170
8171 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8172, __PRETTY_FUNCTION__))
8172 "Unexpected vector type")(((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8172, __PRETTY_FUNCTION__))
;
8173 int NumElts = VT.getVectorNumElements();
8174 int QuarterSize = NumElts / 4;
8175 // The four final parts of the vector, as i32's
8176 SDValue Parts[4];
8177
8178 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8179 // <u,u,u,u>), returning the vmov lane index
8180 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8181 // Detect which mov lane this would be from the first non-undef element.
8182 int MovIdx = -1;
8183 for (int i = 0; i < Length; i++) {
8184 if (ShuffleMask[Start + i] >= 0) {
8185 if (ShuffleMask[Start + i] % Length != i)
8186 return -1;
8187 MovIdx = ShuffleMask[Start + i] / Length;
8188 break;
8189 }
8190 }
8191 // If all items are undef, leave this for other combines
8192 if (MovIdx == -1)
8193 return -1;
8194 // Check the remaining values are the correct part of the same mov
8195 for (int i = 1; i < Length; i++) {
8196 if (ShuffleMask[Start + i] >= 0 &&
8197 (ShuffleMask[Start + i] / Length != MovIdx ||
8198 ShuffleMask[Start + i] % Length != i))
8199 return -1;
8200 }
8201 return MovIdx;
8202 };
8203
8204 for (int Part = 0; Part < 4; ++Part) {
8205 // Does this part look like a mov
8206 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8207 if (Elt != -1) {
8208 SDValue Input = Op->getOperand(0);
8209 if (Elt >= 4) {
8210 Input = Op->getOperand(1);
8211 Elt -= 4;
8212 }
8213 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8214 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8215 DAG.getConstant(Elt, dl, MVT::i32));
8216 }
8217 }
8218
8219 // Nothing interesting found, just return
8220 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8221 return SDValue();
8222
8223 // The other parts need to be built with the old shuffle vector, cast to a
8224 // v4i32 and extract_vector_elts
8225 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8226 SmallVector<int, 16> NewShuffleMask;
8227 for (int Part = 0; Part < 4; ++Part)
8228 for (int i = 0; i < QuarterSize; i++)
8229 NewShuffleMask.push_back(
8230 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8231 SDValue NewShuffle = DAG.getVectorShuffle(
8232 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8233 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8234
8235 for (int Part = 0; Part < 4; ++Part)
8236 if (!Parts[Part])
8237 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8238 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8239 }
8240 // Build a vector out of the various parts and bitcast it back to the original
8241 // type.
8242 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8243 return DAG.getBitcast(VT, NewVec);
8244}
8245
8246static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,
8247 ArrayRef<int> ShuffleMask,
8248 SelectionDAG &DAG) {
8249 SDValue V1 = Op.getOperand(0);
8250 SDValue V2 = Op.getOperand(1);
8251 EVT VT = Op.getValueType();
8252 unsigned NumElts = VT.getVectorNumElements();
8253
8254 // An One-Off Identity mask is one that is mostly an identity mask from as
8255 // single source but contains a single element out-of-place, either from a
8256 // different vector or from another position in the same vector. As opposed to
8257 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8258 // pair directly.
8259 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8260 int &OffElement) {
8261 OffElement = -1;
8262 int NonUndef = 0;
8263 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8264 if (Mask[i] == -1)
8265 continue;
8266 NonUndef++;
8267 if (Mask[i] != i + BaseOffset) {
8268 if (OffElement == -1)
8269 OffElement = i;
8270 else
8271 return false;
8272 }
8273 }
8274 return NonUndef > 2 && OffElement != -1;
8275 };
8276 int OffElement;
8277 SDValue VInput;
8278 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8279 VInput = V1;
8280 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8281 VInput = V2;
8282 else
8283 return SDValue();
8284
8285 SDLoc dl(Op);
8286 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8287 ? MVT::i32
8288 : VT.getScalarType();
8289 SDValue Elt = DAG.getNode(
8290 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8291 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8292 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8293 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8294 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8295}
8296
8297static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
8298 const ARMSubtarget *ST) {
8299 SDValue V1 = Op.getOperand(0);
8300 SDValue V2 = Op.getOperand(1);
8301 SDLoc dl(Op);
8302 EVT VT = Op.getValueType();
8303 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8304 unsigned EltSize = VT.getScalarSizeInBits();
8305
8306 if (ST->hasMVEIntegerOps() && EltSize == 1)
8307 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8308
8309 // Convert shuffles that are directly supported on NEON to target-specific
8310 // DAG nodes, instead of keeping them as shuffles and matching them again
8311 // during code selection. This is more efficient and avoids the possibility
8312 // of inconsistencies between legalization and selection.
8313 // FIXME: floating-point vectors should be canonicalized to integer vectors
8314 // of the same time so that they get CSEd properly.
8315 ArrayRef<int> ShuffleMask = SVN->getMask();
8316
8317 if (EltSize <= 32) {
8318 if (SVN->isSplat()) {
8319 int Lane = SVN->getSplatIndex();
8320 // If this is undef splat, generate it via "just" vdup, if possible.
8321 if (Lane == -1) Lane = 0;
8322
8323 // Test if V1 is a SCALAR_TO_VECTOR.
8324 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8325 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8326 }
8327 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8328 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8329 // reaches it).
8330 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8331 !isa<ConstantSDNode>(V1.getOperand(0))) {
8332 bool IsScalarToVector = true;
8333 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8334 if (!V1.getOperand(i).isUndef()) {
8335 IsScalarToVector = false;
8336 break;
8337 }
8338 if (IsScalarToVector)
8339 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8340 }
8341 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8342 DAG.getConstant(Lane, dl, MVT::i32));
8343 }
8344
8345 bool ReverseVEXT = false;
8346 unsigned Imm = 0;
8347 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8348 if (ReverseVEXT)
8349 std::swap(V1, V2);
8350 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8351 DAG.getConstant(Imm, dl, MVT::i32));
8352 }
8353
8354 if (isVREVMask(ShuffleMask, VT, 64))
8355 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8356 if (isVREVMask(ShuffleMask, VT, 32))
8357 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8358 if (isVREVMask(ShuffleMask, VT, 16))
8359 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8360
8361 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8362 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8363 DAG.getConstant(Imm, dl, MVT::i32));
8364 }
8365
8366 // Check for Neon shuffles that modify both input vectors in place.
8367 // If both results are used, i.e., if there are two shuffles with the same
8368 // source operands and with masks corresponding to both results of one of
8369 // these operations, DAG memoization will ensure that a single node is
8370 // used for both shuffles.
8371 unsigned WhichResult = 0;
8372 bool isV_UNDEF = false;
8373 if (ST->hasNEON()) {
8374 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8375 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8376 if (isV_UNDEF)
8377 V2 = V1;
8378 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8379 .getValue(WhichResult);
8380 }
8381 }
8382 if (ST->hasMVEIntegerOps()) {
8383 if (isVMOVNMask(ShuffleMask, VT, false, false))
8384 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8385 DAG.getConstant(0, dl, MVT::i32));
8386 if (isVMOVNMask(ShuffleMask, VT, true, false))
8387 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8388 DAG.getConstant(1, dl, MVT::i32));
8389 if (isVMOVNMask(ShuffleMask, VT, true, true))
8390 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8391 DAG.getConstant(1, dl, MVT::i32));
8392 }
8393
8394 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8395 // shuffles that produce a result larger than their operands with:
8396 // shuffle(concat(v1, undef), concat(v2, undef))
8397 // ->
8398 // shuffle(concat(v1, v2), undef)
8399 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8400 //
8401 // This is useful in the general case, but there are special cases where
8402 // native shuffles produce larger results: the two-result ops.
8403 //
8404 // Look through the concat when lowering them:
8405 // shuffle(concat(v1, v2), undef)
8406 // ->
8407 // concat(VZIP(v1, v2):0, :1)
8408 //
8409 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8410 SDValue SubV1 = V1->getOperand(0);
8411 SDValue SubV2 = V1->getOperand(1);
8412 EVT SubVT = SubV1.getValueType();
8413
8414 // We expect these to have been canonicalized to -1.
8415 assert(llvm::all_of(ShuffleMask, [&](int i) {((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8417, __PRETTY_FUNCTION__))
8416 return i < (int)VT.getVectorNumElements();((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8417, __PRETTY_FUNCTION__))
8417 }) && "Unexpected shuffle index into UNDEF operand!")((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8417, __PRETTY_FUNCTION__))
;
8418
8419 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8420 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8421 if (isV_UNDEF)
8422 SubV2 = SubV1;
8423 assert((WhichResult == 0) &&(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8424, __PRETTY_FUNCTION__))
8424 "In-place shuffle of concat can only have one result!")(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8424, __PRETTY_FUNCTION__))
;
8425 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8426 SubV1, SubV2);
8427 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8428 Res.getValue(1));
8429 }
8430 }
8431 }
8432
8433 if (ST->hasMVEIntegerOps() && EltSize <= 32)
8434 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8435 return V;
8436
8437 // If the shuffle is not directly supported and it has 4 elements, use
8438 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8439 unsigned NumElts = VT.getVectorNumElements();
8440 if (NumElts == 4) {
8441 unsigned PFIndexes[4];
8442 for (unsigned i = 0; i != 4; ++i) {
8443 if (ShuffleMask[i] < 0)
8444 PFIndexes[i] = 8;
8445 else
8446 PFIndexes[i] = ShuffleMask[i];
8447 }
8448
8449 // Compute the index in the perfect shuffle table.
8450 unsigned PFTableIndex =
8451 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8452 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8453 unsigned Cost = (PFEntry >> 30);
8454
8455 if (Cost <= 4) {
8456 if (ST->hasNEON())
8457 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8458 else if (isLegalMVEShuffleOp(PFEntry)) {
8459 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8460 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8461 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8462 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8463 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8464 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8465 }
8466 }
8467 }
8468
8469 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8470 if (EltSize >= 32) {
8471 // Do the expansion with floating-point types, since that is what the VFP
8472 // registers are defined to use, and since i64 is not legal.
8473 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8474 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8475 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8476 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8477 SmallVector<SDValue, 8> Ops;
8478 for (unsigned i = 0; i < NumElts; ++i) {
8479 if (ShuffleMask[i] < 0)
8480 Ops.push_back(DAG.getUNDEF(EltVT));
8481 else
8482 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8483 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8484 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8485 dl, MVT::i32)));
8486 }
8487 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8488 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8489 }
8490
8491 if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
8492 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
8493
8494 if (ST->hasNEON() && VT == MVT::v8i8)
8495 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8496 return NewOp;
8497
8498 if (ST->hasMVEIntegerOps())
8499 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8500 return NewOp;
8501
8502 return SDValue();
8503}
8504
8505static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
8506 const ARMSubtarget *ST) {
8507 EVT VecVT = Op.getOperand(0).getValueType();
8508 SDLoc dl(Op);
8509
8510 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "LowerINSERT_VECTOR_ELT_i1 called without MVE!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8511, __PRETTY_FUNCTION__))
8511 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")((ST->hasMVEIntegerOps() && "LowerINSERT_VECTOR_ELT_i1 called without MVE!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8511, __PRETTY_FUNCTION__))
;
8512
8513 SDValue Conv =
8514 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8515 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
8516 unsigned LaneWidth =
8517 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
8518 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8519 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
8520 Op.getOperand(1), DAG.getValueType(MVT::i1));
8521 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8522 DAG.getConstant(~Mask, dl, MVT::i32));
8523 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8524}
8525
8526SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8527 SelectionDAG &DAG) const {
8528 // INSERT_VECTOR_ELT is legal only for immediate indexes.
8529 SDValue Lane = Op.getOperand(2);
8530 if (!isa<ConstantSDNode>(Lane))
8531 return SDValue();
8532
8533 SDValue Elt = Op.getOperand(1);
8534 EVT EltVT = Elt.getValueType();
8535
8536 if (Subtarget->hasMVEIntegerOps() &&
8537 Op.getValueType().getScalarSizeInBits() == 1)
8538 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8539
8540 if (getTypeAction(*DAG.getContext(), EltVT) ==
8541 TargetLowering::TypePromoteFloat) {
8542 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8543 // but the type system will try to do that if we don't intervene.
8544 // Reinterpret any such vector-element insertion as one with the
8545 // corresponding integer types.
8546
8547 SDLoc dl(Op);
8548
8549 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
8550 assert(getTypeAction(*DAG.getContext(), IEltVT) !=((getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::
TypePromoteFloat) ? static_cast<void> (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8551, __PRETTY_FUNCTION__))
8551 TargetLowering::TypePromoteFloat)((getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::
TypePromoteFloat) ? static_cast<void> (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8551, __PRETTY_FUNCTION__))
;
8552
8553 SDValue VecIn = Op.getOperand(0);
8554 EVT VecVT = VecIn.getValueType();
8555 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
8556 VecVT.getVectorNumElements());
8557
8558 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
8559 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
8560 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
8561 IVecIn, IElt, Lane);
8562 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
8563 }
8564
8565 return Op;
8566}
8567
8568static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
8569 const ARMSubtarget *ST) {
8570 EVT VecVT = Op.getOperand(0).getValueType();
8571 SDLoc dl(Op);
8572
8573 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "LowerINSERT_VECTOR_ELT_i1 called without MVE!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8574, __PRETTY_FUNCTION__))
8574 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")((ST->hasMVEIntegerOps() && "LowerINSERT_VECTOR_ELT_i1 called without MVE!"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8574, __PRETTY_FUNCTION__))
;
8575
8576 SDValue Conv =
8577 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8578 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
8579 unsigned LaneWidth =
8580 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
8581 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
8582 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
8583 return Shift;
8584}
8585
8586static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
8587 const ARMSubtarget *ST) {
8588 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
8589 SDValue Lane = Op.getOperand(1);
8590 if (!isa<ConstantSDNode>(Lane))
8591 return SDValue();
8592
8593 SDValue Vec = Op.getOperand(0);
8594 EVT VT = Vec.getValueType();
8595
8596 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8597 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
8598
8599 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
8600 SDLoc dl(Op);
8601 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8602 }
8603
8604 return Op;
8605}
8606
8607static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
8608 const ARMSubtarget *ST) {
8609 SDValue V1 = Op.getOperand(0);
8610 SDValue V2 = Op.getOperand(1);
8611 SDLoc dl(Op);
8612 EVT VT = Op.getValueType();
8613 EVT Op1VT = V1.getValueType();
8614 EVT Op2VT = V2.getValueType();
8615 unsigned NumElts = VT.getVectorNumElements();
8616
8617 assert(Op1VT == Op2VT && "Operand types don't match!")((Op1VT == Op2VT && "Operand types don't match!") ? static_cast
<void> (0) : __assert_fail ("Op1VT == Op2VT && \"Operand types don't match!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8617, __PRETTY_FUNCTION__))
;
8618 assert(VT.getScalarSizeInBits() == 1 &&((VT.getScalarSizeInBits() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8619, __PRETTY_FUNCTION__))
8619 "Unexpected custom CONCAT_VECTORS lowering")((VT.getScalarSizeInBits() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8619, __PRETTY_FUNCTION__))
;
8620 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "CONCAT_VECTORS lowering only supported for MVE"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8621, __PRETTY_FUNCTION__))
8621 "CONCAT_VECTORS lowering only supported for MVE")((ST->hasMVEIntegerOps() && "CONCAT_VECTORS lowering only supported for MVE"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8621, __PRETTY_FUNCTION__))
;
8622
8623 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8624 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
8625
8626 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
8627 // promoted to v8i16, etc.
8628
8629 MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
8630
8631 // Extract the vector elements from Op1 and Op2 one by one and truncate them
8632 // to be the right size for the destination. For example, if Op1 is v4i1 then
8633 // the promoted vector is v4i32. The result of concatentation gives a v8i1,
8634 // which when promoted is v8i16. That means each i32 element from Op1 needs
8635 // truncating to i16 and inserting in the result.
8636 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
8637 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
8638 auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
8639 EVT NewVT = NewV.getValueType();
8640 EVT ConcatVT = ConVec.getValueType();
8641 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
8642 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
8643 DAG.getIntPtrConstant(i, dl));
8644 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
8645 DAG.getConstant(j, dl, MVT::i32));
8646 }
8647 return ConVec;
8648 };
8649 unsigned j = 0;
8650 ConVec = ExractInto(NewV1, ConVec, j);
8651 ConVec = ExractInto(NewV2, ConVec, j);
8652
8653 // Now return the result of comparing the subvector with zero,
8654 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8655 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
8656 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8657}
8658
8659static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
8660 const ARMSubtarget *ST) {
8661 EVT VT = Op->getValueType(0);
8662 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8663 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
8664
8665 // The only time a CONCAT_VECTORS operation can have legal types is when
8666 // two 64-bit vectors are concatenated to a 128-bit vector.
8667 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8668, __PRETTY_FUNCTION__))
8668 "unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8668, __PRETTY_FUNCTION__))
;
8669 SDLoc dl(Op);
8670 SDValue Val = DAG.getUNDEF(MVT::v2f64);
8671 SDValue Op0 = Op.getOperand(0);
8672 SDValue Op1 = Op.getOperand(1);
8673 if (!Op0.isUndef())
8674 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8675 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
8676 DAG.getIntPtrConstant(0, dl));
8677 if (!Op1.isUndef())
8678 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
8679 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
8680 DAG.getIntPtrConstant(1, dl));
8681 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
8682}
8683
8684static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
8685 const ARMSubtarget *ST) {
8686 SDValue V1 = Op.getOperand(0);
8687 SDValue V2 = Op.getOperand(1);
8688 SDLoc dl(Op);
8689 EVT VT = Op.getValueType();
8690 EVT Op1VT = V1.getValueType();
8691 unsigned NumElts = VT.getVectorNumElements();
8692 unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
8693
8694 assert(VT.getScalarSizeInBits() == 1 &&((VT.getScalarSizeInBits() == 1 && "Unexpected custom EXTRACT_SUBVECTOR lowering"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8695, __PRETTY_FUNCTION__))
8695 "Unexpected custom EXTRACT_SUBVECTOR lowering")((VT.getScalarSizeInBits() == 1 && "Unexpected custom EXTRACT_SUBVECTOR lowering"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8695, __PRETTY_FUNCTION__))
;
8696 assert(ST->hasMVEIntegerOps() &&((ST->hasMVEIntegerOps() && "EXTRACT_SUBVECTOR lowering only supported for MVE"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8697, __PRETTY_FUNCTION__))
8697 "EXTRACT_SUBVECTOR lowering only supported for MVE")((ST->hasMVEIntegerOps() && "EXTRACT_SUBVECTOR lowering only supported for MVE"
) ? static_cast<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8697, __PRETTY_FUNCTION__))
;
8698
8699 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
8700
8701 // We now have Op1 promoted to a vector of integers, where v8i1 gets
8702 // promoted to v8i16, etc.
8703
8704 MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
8705
8706 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
8707 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
8708 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
8709 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
8710 DAG.getIntPtrConstant(i, dl));
8711 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
8712 DAG.getConstant(j, dl, MVT::i32));
8713 }
8714
8715 // Now return the result of comparing the subvector with zero,
8716 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
8717 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
8718 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8719}
8720
8721// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
8722static SDValue LowerTruncatei1(SDValue N, SelectionDAG &DAG,
8723 const ARMSubtarget *ST) {
8724 assert(ST->hasMVEIntegerOps() && "Expected MVE!")((ST->hasMVEIntegerOps() && "Expected MVE!") ? static_cast
<void> (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"Expected MVE!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8724, __PRETTY_FUNCTION__))
;
8725 EVT VT = N.getValueType();
8726 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&(((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
"Expected a vector i1 type!") ? static_cast<void> (0) :
__assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8727, __PRETTY_FUNCTION__))
8727 "Expected a vector i1 type!")(((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
"Expected a vector i1 type!") ? static_cast<void> (0) :
__assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8727, __PRETTY_FUNCTION__))
;
8728 SDValue Op = N.getOperand(0);
8729 EVT FromVT = Op.getValueType();
8730 SDLoc DL(N);
8731
8732 SDValue And =
8733 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
8734 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
8735 DAG.getCondCode(ISD::SETNE));
8736}
8737
8738/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
8739/// element has been zero/sign-extended, depending on the isSigned parameter,
8740/// from an integer type half its size.
8741static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
8742 bool isSigned) {
8743 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
8744 EVT VT = N->getValueType(0);
8745 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
8746 SDNode *BVN = N->getOperand(0).getNode();
8747 if (BVN->getValueType(0) != MVT::v4i32 ||
8748 BVN->getOpcode() != ISD::BUILD_VECTOR)
8749 return false;
8750 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
8751 unsigned HiElt = 1 - LoElt;
8752 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
8753 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
8754 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
8755 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
8756 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
8757 return false;
8758 if (isSigned) {
8759 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
8760 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
8761 return true;
8762 } else {
8763 if (Hi0->isNullValue() && Hi1->isNullValue())
8764 return true;
8765 }
8766 return false;
8767 }
8768
8769 if (N->getOpcode() != ISD::BUILD_VECTOR)
8770 return false;
8771
8772 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
8773 SDNode *Elt = N->getOperand(i).getNode();
8774 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
8775 unsigned EltSize = VT.getScalarSizeInBits();
8776 unsigned HalfSize = EltSize / 2;
8777 if (isSigned) {
8778 if (!isIntN(HalfSize, C->getSExtValue()))
8779 return false;
8780 } else {
8781 if (!isUIntN(HalfSize, C->getZExtValue()))
8782 return false;
8783 }
8784 continue;
8785 }
8786 return false;
8787 }
8788
8789 return true;
8790}
8791
8792/// isSignExtended - Check if a node is a vector value that is sign-extended
8793/// or a constant BUILD_VECTOR with sign-extended elements.
8794static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
8795 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
8796 return true;
8797 if (isExtendedBUILD_VECTOR(N, DAG, true))
8798 return true;
8799 return false;
8800}
8801
8802/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
8803/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
8804static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
8805 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
8806 ISD::isZEXTLoad(N))
8807 return true;
8808 if (isExtendedBUILD_VECTOR(N, DAG, false))
8809 return true;
8810 return false;
8811}
8812
8813static EVT getExtensionTo64Bits(const EVT &OrigVT) {
8814 if (OrigVT.getSizeInBits() >= 64)
8815 return OrigVT;
8816
8817 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8817, __PRETTY_FUNCTION__))
;
8818
8819 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
8820 switch (OrigSimpleTy) {
8821 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8821)
;
8822 case MVT::v2i8:
8823 case MVT::v2i16:
8824 return MVT::v2i32;
8825 case MVT::v4i8:
8826 return MVT::v4i16;
8827 }
8828}
8829
8830/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
8831/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
8832/// We insert the required extension here to get the vector to fill a D register.
8833static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
8834 const EVT &OrigTy,
8835 const EVT &ExtTy,
8836 unsigned ExtOpcode) {
8837 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
8838 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
8839 // 64-bits we need to insert a new extension so that it will be 64-bits.
8840 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8840, __PRETTY_FUNCTION__))
;
8841 if (OrigTy.getSizeInBits() >= 64)
8842 return N;
8843
8844 // Must extend size to at least 64 bits to be used as an operand for VMULL.
8845 EVT NewVT = getExtensionTo64Bits(OrigTy);
8846
8847 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
8848}
8849
8850/// SkipLoadExtensionForVMULL - return a load of the original vector size that
8851/// does not do any sign/zero extension. If the original vector is less
8852/// than 64 bits, an appropriate extension will be added after the load to
8853/// reach a total size of 64 bits. We have to add the extension separately
8854/// because ARM does not have a sign/zero extending load for vectors.
8855static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
8856 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
8857
8858 // The load already has the right type.
8859 if (ExtendedTy == LD->getMemoryVT())
8860 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
8861 LD->getBasePtr(), LD->getPointerInfo(),
8862 LD->getAlignment(), LD->getMemOperand()->getFlags());
8863
8864 // We need to create a zextload/sextload. We cannot just create a load
8865 // followed by a zext/zext node because LowerMUL is also run during normal
8866 // operation legalization where we can't create illegal types.
8867 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
8868 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
8869 LD->getMemoryVT(), LD->getAlignment(),
8870 LD->getMemOperand()->getFlags());
8871}
8872
8873/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
8874/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
8875/// the unextended value. The unextended vector should be 64 bits so that it can
8876/// be used as an operand to a VMULL instruction. If the original vector size
8877/// before extension is less than 64 bits we add a an extension to resize
8878/// the vector to 64 bits.
8879static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
8880 if (N->getOpcode() == ISD::SIGN_EXTEND ||
8881 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
8882 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
8883 N->getOperand(0)->getValueType(0),
8884 N->getValueType(0),
8885 N->getOpcode());
8886
8887 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
8888 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8889, __PRETTY_FUNCTION__))
8889 "Expected extending load")(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8889, __PRETTY_FUNCTION__))
;
8890
8891 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
8892 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
8893 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8894 SDValue extLoad =
8895 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
8896 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
8897
8898 return newLoad;
8899 }
8900
8901 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
8902 // have been legalized as a BITCAST from v4i32.
8903 if (N->getOpcode() == ISD::BITCAST) {
8904 SDNode *BVN = N->getOperand(0).getNode();
8905 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8906, __PRETTY_FUNCTION__))
8906 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8906, __PRETTY_FUNCTION__))
;
8907 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
8908 return DAG.getBuildVector(
8909 MVT::v2i32, SDLoc(N),
8910 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
8911 }
8912 // Construct a new BUILD_VECTOR with elements truncated to half the size.
8913 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8913, __PRETTY_FUNCTION__))
;
8914 EVT VT = N->getValueType(0);
8915 unsigned EltSize = VT.getScalarSizeInBits() / 2;
8916 unsigned NumElts = VT.getVectorNumElements();
8917 MVT TruncVT = MVT::getIntegerVT(EltSize);
8918 SmallVector<SDValue, 8> Ops;
8919 SDLoc dl(N);
8920 for (unsigned i = 0; i != NumElts; ++i) {
8921 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
8922 const APInt &CInt = C->getAPIntValue();
8923 // Element types smaller than 32 bits are not legal, so use i32 elements.
8924 // The values are implicitly truncated so sext vs. zext doesn't matter.
8925 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
8926 }
8927 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
8928}
8929
8930static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
8931 unsigned Opcode = N->getOpcode();
8932 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
8933 SDNode *N0 = N->getOperand(0).getNode();
8934 SDNode *N1 = N->getOperand(1).getNode();
8935 return N0->hasOneUse() && N1->hasOneUse() &&
8936 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
8937 }
8938 return false;
8939}
8940
8941static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
8942 unsigned Opcode = N->getOpcode();
8943 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
8944 SDNode *N0 = N->getOperand(0).getNode();
8945 SDNode *N1 = N->getOperand(1).getNode();
8946 return N0->hasOneUse() && N1->hasOneUse() &&
8947 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
8948 }
8949 return false;
8950}
8951
8952static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
8953 // Multiplications are only custom-lowered for 128-bit vectors so that
8954 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
8955 EVT VT = Op.getValueType();
8956 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8957, __PRETTY_FUNCTION__))
8957 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8957, __PRETTY_FUNCTION__))
;
8958 SDNode *N0 = Op.getOperand(0).getNode();
8959 SDNode *N1 = Op.getOperand(1).getNode();
8960 unsigned NewOpc = 0;
8961 bool isMLA = false;
8962 bool isN0SExt = isSignExtended(N0, DAG);
8963 bool isN1SExt = isSignExtended(N1, DAG);
8964 if (isN0SExt && isN1SExt)
8965 NewOpc = ARMISD::VMULLs;
8966 else {
8967 bool isN0ZExt = isZeroExtended(N0, DAG);
8968 bool isN1ZExt = isZeroExtended(N1, DAG);
8969 if (isN0ZExt && isN1ZExt)
8970 NewOpc = ARMISD::VMULLu;
8971 else if (isN1SExt || isN1ZExt) {
8972 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
8973 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
8974 if (isN1SExt && isAddSubSExt(N0, DAG)) {
8975 NewOpc = ARMISD::VMULLs;
8976 isMLA = true;
8977 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
8978 NewOpc = ARMISD::VMULLu;
8979 isMLA = true;
8980 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
8981 std::swap(N0, N1);
8982 NewOpc = ARMISD::VMULLu;
8983 isMLA = true;
8984 }
8985 }
8986
8987 if (!NewOpc) {
8988 if (VT == MVT::v2i64)
8989 // Fall through to expand this. It is not legal.
8990 return SDValue();
8991 else
8992 // Other vector multiplications are legal.
8993 return Op;
8994 }
8995 }
8996
8997 // Legalize to a VMULL instruction.
8998 SDLoc DL(Op);
8999 SDValue Op0;
9000 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9001 if (!isMLA) {
9002 Op0 = SkipExtensionForVMULL(N0, DAG);
9003 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9005, __PRETTY_FUNCTION__))
9004 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9005, __PRETTY_FUNCTION__))
9005 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9005, __PRETTY_FUNCTION__))
;
9006 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9007 }
9008
9009 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9010 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9011 // vmull q0, d4, d6
9012 // vmlal q0, d5, d6
9013 // is faster than
9014 // vaddl q0, d4, d5
9015 // vmovl q1, d6
9016 // vmul q0, q0, q1
9017 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9018 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9019 EVT Op1VT = Op1.getValueType();
9020 return DAG.getNode(N0->getOpcode(), DL, VT,
9021 DAG.getNode(NewOpc, DL, VT,
9022 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9023 DAG.getNode(NewOpc, DL, VT,
9024 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9025}
9026
9027static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
9028 SelectionDAG &DAG) {
9029 // TODO: Should this propagate fast-math-flags?
9030
9031 // Convert to float
9032 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9033 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9034 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9035 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9036 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9037 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9038 // Get reciprocal estimate.
9039 // float4 recip = vrecpeq_f32(yf);
9040 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9041 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9042 Y);
9043 // Because char has a smaller range than uchar, we can actually get away
9044 // without any newton steps. This requires that we use a weird bias
9045 // of 0xb000, however (again, this has been exhaustively tested).
9046 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9047 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9048 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9049 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9050 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9051 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9052 // Convert back to short.
9053 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9054 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9055 return X;
9056}
9057
9058static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
9059 SelectionDAG &DAG) {
9060 // TODO: Should this propagate fast-math-flags?
9061
9062 SDValue N2;
9063 // Convert to float.
9064 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9065 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9066 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9067 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9068 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9069 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9070
9071 // Use reciprocal estimate and one refinement step.
9072 // float4 recip = vrecpeq_f32(yf);
9073 // recip *= vrecpsq_f32(yf, recip);
9074 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9075 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9076 N1);
9077 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9078 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9079 N1, N2);
9080 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9081 // Because short has a smaller range than ushort, we can actually get away
9082 // with only a single newton step. This requires that we use a weird bias
9083 // of 89, however (again, this has been exhaustively tested).
9084 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9085 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9086 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9087 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9088 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9089 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9090 // Convert back to integer and return.
9091 // return vmovn_s32(vcvt_s32_f32(result));
9092 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9093 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9094 return N0;
9095}
9096
9097static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG,
9098 const ARMSubtarget *ST) {
9099 EVT VT = Op.getValueType();
9100 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9101, __PRETTY_FUNCTION__))
9101 "unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9101, __PRETTY_FUNCTION__))
;
9102
9103 SDLoc dl(Op);
9104 SDValue N0 = Op.getOperand(0);
9105 SDValue N1 = Op.getOperand(1);
9106 SDValue N2, N3;
9107
9108 if (VT == MVT::v8i8) {
9109 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9110 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9111
9112 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9113 DAG.getIntPtrConstant(4, dl));
9114 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9115 DAG.getIntPtrConstant(4, dl));
9116 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9117 DAG.getIntPtrConstant(0, dl));
9118 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9119 DAG.getIntPtrConstant(0, dl));
9120
9121 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9122 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9123
9124 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9125 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9126
9127 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9128 return N0;
9129 }
9130 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9131}
9132
9133static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,
9134 const ARMSubtarget *ST) {
9135 // TODO: Should this propagate fast-math-flags?
9136 EVT VT = Op.getValueType();
9137 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9138, __PRETTY_FUNCTION__))
9138 "unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9138, __PRETTY_FUNCTION__))
;
9139
9140 SDLoc dl(Op);
9141 SDValue N0 = Op.getOperand(0);
9142 SDValue N1 = Op.getOperand(1);
9143 SDValue N2, N3;
9144
9145 if (VT == MVT::v8i8) {
9146 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9147 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9148
9149 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9150 DAG.getIntPtrConstant(4, dl));
9151 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9152 DAG.getIntPtrConstant(4, dl));
9153 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9154 DAG.getIntPtrConstant(0, dl));
9155 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9156 DAG.getIntPtrConstant(0, dl));
9157
9158 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9159 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9160
9161 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9162 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9163
9164 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9165 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9166 MVT::i32),
9167 N0);
9168 return N0;
9169 }
9170
9171 // v4i16 sdiv ... Convert to float.
9172 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9173 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9174 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9175 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9176 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9177 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9178
9179 // Use reciprocal estimate and two refinement steps.
9180 // float4 recip = vrecpeq_f32(yf);
9181 // recip *= vrecpsq_f32(yf, recip);
9182 // recip *= vrecpsq_f32(yf, recip);
9183 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9184 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9185 BN1);
9186 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9187 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9188 BN1, N2);
9189 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9190 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9191 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9192 BN1, N2);
9193 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9194 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9195 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9196 // and that it will never cause us to return an answer too large).
9197 // float4 result = as_float4(as_int4(xf*recip) + 2);
9198 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9199 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9200 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9201 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9202 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9203 // Convert back to integer and return.
9204 // return vmovn_u32(vcvt_s32_f32(result));
9205 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9206 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9207 return N0;
9208}
9209
9210static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
9211 SDNode *N = Op.getNode();
9212 EVT VT = N->getValueType(0);
9213 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9214
9215 SDValue Carry = Op.getOperand(2);
9216
9217 SDLoc DL(Op);
9218
9219 SDValue Result;
9220 if (Op.getOpcode() == ISD::ADDCARRY) {
9221 // This converts the boolean value carry into the carry flag.
9222 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9223
9224 // Do the addition proper using the carry flag we wanted.
9225 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9226 Op.getOperand(1), Carry);
9227
9228 // Now convert the carry flag into a boolean value.
9229 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9230 } else {
9231 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
9232 // have to invert the carry first.
9233 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9234 DAG.getConstant(1, DL, MVT::i32), Carry);
9235 // This converts the boolean value carry into the carry flag.
9236 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9237
9238 // Do the subtraction proper using the carry flag we wanted.
9239 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9240 Op.getOperand(1), Carry);
9241
9242 // Now convert the carry flag into a boolean value.
9243 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9244 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9245 // by ISD::SUBCARRY, so compute 1 - C.
9246 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9247 DAG.getConstant(1, DL, MVT::i32), Carry);
9248 }
9249
9250 // Return both values.
9251 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9252}
9253
9254SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9255 assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9255, __PRETTY_FUNCTION__))
;
9256
9257 // For iOS, we want to call an alternative entry point: __sincos_stret,
9258 // return values are passed via sret.
9259 SDLoc dl(Op);
9260 SDValue Arg = Op.getOperand(0);
9261 EVT ArgVT = Arg.getValueType();
9262 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9263 auto PtrVT = getPointerTy(DAG.getDataLayout());
9264
9265 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9266 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9267
9268 // Pair of floats / doubles used to pass the result.
9269 Type *RetTy = StructType::get(ArgTy, ArgTy);
9270 auto &DL = DAG.getDataLayout();
9271
9272 ArgListTy Args;
9273 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9274 SDValue SRet;
9275 if (ShouldUseSRet) {
9276 // Create stack object for sret.
9277 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9278 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9279 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9280 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9281
9282 ArgListEntry Entry;
9283 Entry.Node = SRet;
9284 Entry.Ty = RetTy->getPointerTo();
9285 Entry.IsSExt = false;
9286 Entry.IsZExt = false;
9287 Entry.IsSRet = true;
9288 Args.push_back(Entry);
9289 RetTy = Type::getVoidTy(*DAG.getContext());
9290 }
9291
9292 ArgListEntry Entry;
9293 Entry.Node = Arg;
9294 Entry.Ty = ArgTy;
9295 Entry.IsSExt = false;
9296 Entry.IsZExt = false;
9297 Args.push_back(Entry);
9298
9299 RTLIB::Libcall LC =
9300 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9301 const char *LibcallName = getLibcallName(LC);
9302 CallingConv::ID CC = getLibcallCallingConv(LC);
9303 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9304
9305 TargetLowering::CallLoweringInfo CLI(DAG);
9306 CLI.setDebugLoc(dl)
9307 .setChain(DAG.getEntryNode())
9308 .setCallee(CC, RetTy, Callee, std::move(Args))
9309 .setDiscardResult(ShouldUseSRet);
9310 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9311
9312 if (!ShouldUseSRet)
9313 return CallResult.first;
9314
9315 SDValue LoadSin =
9316 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9317
9318 // Address of cos field.
9319 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9320 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9321 SDValue LoadCos =
9322 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9323
9324 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9325 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9326 LoadSin.getValue(0), LoadCos.getValue(0));
9327}
9328
9329SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9330 bool Signed,
9331 SDValue &Chain) const {
9332 EVT VT = Op.getValueType();
9333 assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9334, __PRETTY_FUNCTION__))
9334 "unexpected type for custom lowering DIV")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9334, __PRETTY_FUNCTION__))
;
9335 SDLoc dl(Op);
9336
9337 const auto &DL = DAG.getDataLayout();
9338 const auto &TLI = DAG.getTargetLoweringInfo();
9339
9340 const char *Name = nullptr;
9341 if (Signed)
9342 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9343 else
9344 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9345
9346 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
9347
9348 ARMTargetLowering::ArgListTy Args;
9349
9350 for (auto AI : {1, 0}) {
9351 ArgListEntry Arg;
9352 Arg.Node = Op.getOperand(AI);
9353 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
9354 Args.push_back(Arg);
9355 }
9356
9357 CallLoweringInfo CLI(DAG);
9358 CLI.setDebugLoc(dl)
9359 .setChain(Chain)
9360 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
9361 ES, std::move(Args));
9362
9363 return LowerCallTo(CLI).first;
9364}
9365
9366// This is a code size optimisation: return the original SDIV node to
9367// DAGCombiner when we don't want to expand SDIV into a sequence of
9368// instructions, and an empty node otherwise which will cause the
9369// SDIV to be expanded in DAGCombine.
9370SDValue
9371ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9372 SelectionDAG &DAG,
9373 SmallVectorImpl<SDNode *> &Created) const {
9374 // TODO: Support SREM
9375 if (N->getOpcode() != ISD::SDIV)
9376 return SDValue();
9377
9378 const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
9379 const bool MinSize = ST.hasMinSize();
9380 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9381 : ST.hasDivideInARMMode();
9382
9383 // Don't touch vector types; rewriting this may lead to scalarizing
9384 // the int divs.
9385 if (N->getOperand(0).getValueType().isVector())
9386 return SDValue();
9387
9388 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9389 // hwdiv support for this to be really profitable.
9390 if (!(MinSize && HasDivide))
9391 return SDValue();
9392
9393 // ARM mode is a bit simpler than Thumb: we can handle large power
9394 // of 2 immediates with 1 mov instruction; no further checks required,
9395 // just return the sdiv node.
9396 if (!ST.isThumb())
9397 return SDValue(N, 0);
9398
9399 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9400 // and thus lose the code size benefits of a MOVS that requires only 2.
9401 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9402 // but as it's doing exactly this, it's not worth the trouble to get TTI.
9403 if (Divisor.sgt(128))
9404 return SDValue();
9405
9406 return SDValue(N, 0);
9407}
9408
9409SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9410 bool Signed) const {
9411 assert(Op.getValueType() == MVT::i32 &&((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9412, __PRETTY_FUNCTION__))
9412 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9412, __PRETTY_FUNCTION__))
;
9413 SDLoc dl(Op);
9414
9415 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9416 DAG.getEntryNode(), Op.getOperand(1));
9417
9418 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9419}
9420
9421static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
9422 SDLoc DL(N);
9423 SDValue Op = N->getOperand(1);
9424 if (N->getValueType(0) == MVT::i32)
9425 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
9426 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
9427 DAG.getConstant(0, DL, MVT::i32));
9428 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
9429 DAG.getConstant(1, DL, MVT::i32));
9430 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
9431 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
9432}
9433
9434void ARMTargetLowering::ExpandDIV_Windows(
9435 SDValue Op, SelectionDAG &DAG, bool Signed,
9436 SmallVectorImpl<SDValue> &Results) const {
9437 const auto &DL = DAG.getDataLayout();
9438 const auto &TLI = DAG.getTargetLoweringInfo();
9439
9440 assert(Op.getValueType() == MVT::i64 &&((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9441, __PRETTY_FUNCTION__))
9441 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9441, __PRETTY_FUNCTION__))
;
9442 SDLoc dl(Op);
9443
9444 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
9445
9446 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9447
9448 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
9449 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
9450 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
9451 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
9452
9453 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
9454}
9455
9456static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
9457 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
9458 EVT MemVT = LD->getMemoryVT();
9459 assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&(((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::
v16i1) && "Expected a predicate type!") ? static_cast
<void> (0) : __assert_fail ("(MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9460, __PRETTY_FUNCTION__))
9460 "Expected a predicate type!")(((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::
v16i1) && "Expected a predicate type!") ? static_cast
<void> (0) : __assert_fail ("(MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9460, __PRETTY_FUNCTION__))
;
9461 assert(MemVT == Op.getValueType())((MemVT == Op.getValueType()) ? static_cast<void> (0) :
__assert_fail ("MemVT == Op.getValueType()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9461, __PRETTY_FUNCTION__))
;
9462 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&((LD->getExtensionType() == ISD::NON_EXTLOAD && "Expected a non-extending load"
) ? static_cast<void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9463, __PRETTY_FUNCTION__))
9463 "Expected a non-extending load")((LD->getExtensionType() == ISD::NON_EXTLOAD && "Expected a non-extending load"
) ? static_cast<void> (0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9463, __PRETTY_FUNCTION__))
;
9464 assert(LD->isUnindexed() && "Expected a unindexed load")((LD->isUnindexed() && "Expected a unindexed load"
) ? static_cast<void> (0) : __assert_fail ("LD->isUnindexed() && \"Expected a unindexed load\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9464, __PRETTY_FUNCTION__))
;
9465
9466 // The basic MVE VLDR on a v4i1/v8i1 actually loads the entire 16bit
9467 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
9468 // need to make sure that 8/4 bits are actually loaded into the correct
9469 // place, which means loading the value and then shuffling the values into
9470 // the bottom bits of the predicate.
9471 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
9472 // for BE).
9473 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
9474 // a natural VMSR(load), so needs to be reversed.
9475
9476 SDLoc dl(Op);
9477 SDValue Load = DAG.getExtLoad(
9478 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9479 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
9480 LD->getMemOperand());
9481 SDValue Val = Load;
9482 if (DAG.getDataLayout().isBigEndian())
9483 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
9484 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
9485 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
9486 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
9487 if (MemVT != MVT::v16i1)
9488 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
9489 DAG.getConstant(0, dl, MVT::i32));
9490 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
9491}
9492
9493void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
9494 SelectionDAG &DAG) const {
9495 LoadSDNode *LD = cast<LoadSDNode>(N);
9496 EVT MemVT = LD->getMemoryVT();
9497 assert(LD->isUnindexed() && "Loads should be unindexed at this point.")((LD->isUnindexed() && "Loads should be unindexed at this point."
) ? static_cast<void> (0) : __assert_fail ("LD->isUnindexed() && \"Loads should be unindexed at this point.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9497, __PRETTY_FUNCTION__))
;
9498
9499 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9500 !Subtarget->isThumb1Only() && LD->isVolatile()) {
9501 SDLoc dl(N);
9502 SDValue Result = DAG.getMemIntrinsicNode(
9503 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
9504 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
9505 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
9506 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
9507 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
9508 Results.append({Pair, Result.getValue(2)});
9509 }
9510}
9511
9512static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
9513 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9514 EVT MemVT = ST->getMemoryVT();
9515 assert((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&(((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::
v16i1) && "Expected a predicate type!") ? static_cast
<void> (0) : __assert_fail ("(MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9516, __PRETTY_FUNCTION__))
9516 "Expected a predicate type!")(((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::
v16i1) && "Expected a predicate type!") ? static_cast
<void> (0) : __assert_fail ("(MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9516, __PRETTY_FUNCTION__))
;
9517 assert(MemVT == ST->getValue().getValueType())((MemVT == ST->getValue().getValueType()) ? static_cast<
void> (0) : __assert_fail ("MemVT == ST->getValue().getValueType()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9517, __PRETTY_FUNCTION__))
;
9518 assert(!ST->isTruncatingStore() && "Expected a non-extending store")((!ST->isTruncatingStore() && "Expected a non-extending store"
) ? static_cast<void> (0) : __assert_fail ("!ST->isTruncatingStore() && \"Expected a non-extending store\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9518, __PRETTY_FUNCTION__))
;
9519 assert(ST->isUnindexed() && "Expected a unindexed store")((ST->isUnindexed() && "Expected a unindexed store"
) ? static_cast<void> (0) : __assert_fail ("ST->isUnindexed() && \"Expected a unindexed store\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9519, __PRETTY_FUNCTION__))
;
9520
9521 // Only store the v4i1 or v8i1 worth of bits, via a buildvector with top bits
9522 // unset and a scalar store.
9523 SDLoc dl(Op);
9524 SDValue Build = ST->getValue();
9525 if (MemVT != MVT::v16i1) {
9526 SmallVector<SDValue, 16> Ops;
9527 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
9528 unsigned Elt = DAG.getDataLayout().isBigEndian()
9529 ? MemVT.getVectorNumElements() - I - 1
9530 : I;
9531 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
9532 DAG.getConstant(Elt, dl, MVT::i32)));
9533 }
9534 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
9535 Ops.push_back(DAG.getUNDEF(MVT::i32));
9536 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
9537 }
9538 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
9539 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
9540 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
9541 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
9542 DAG.getConstant(16, dl, MVT::i32));
9543 return DAG.getTruncStore(
9544 ST->getChain(), dl, GRP, ST->getBasePtr(),
9545 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
9546 ST->getMemOperand());
9547}
9548
9549static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
9550 const ARMSubtarget *Subtarget) {
9551 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
9552 EVT MemVT = ST->getMemoryVT();
9553 assert(ST->isUnindexed() && "Stores should be unindexed at this point.")((ST->isUnindexed() && "Stores should be unindexed at this point."
) ? static_cast<void> (0) : __assert_fail ("ST->isUnindexed() && \"Stores should be unindexed at this point.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9553, __PRETTY_FUNCTION__))
;
9554
9555 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
9556 !Subtarget->isThumb1Only() && ST->isVolatile()) {
9557 SDNode *N = Op.getNode();
9558 SDLoc dl(N);
9559
9560 SDValue Lo = DAG.getNode(
9561 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9562 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
9563 MVT::i32));
9564 SDValue Hi = DAG.getNode(
9565 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
9566 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
9567 MVT::i32));
9568
9569 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
9570 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
9571 MemVT, ST->getMemOperand());
9572 } else if (Subtarget->hasMVEIntegerOps() &&
9573 ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
9574 MemVT == MVT::v16i1))) {
9575 return LowerPredicateStore(Op, DAG);
9576 }
9577
9578 return SDValue();
9579}
9580
9581static bool isZeroVector(SDValue N) {
9582 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
9583 (N->getOpcode() == ARMISD::VMOVIMM &&
9584 isNullConstant(N->getOperand(0))));
9585}
9586
9587static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
9588 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
9589 MVT VT = Op.getSimpleValueType();
9590 SDValue Mask = N->getMask();
9591 SDValue PassThru = N->getPassThru();
9592 SDLoc dl(Op);
9593
9594 if (isZeroVector(PassThru))
9595 return Op;
9596
9597 // MVE Masked loads use zero as the passthru value. Here we convert undef to
9598 // zero too, and other values are lowered to a select.
9599 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
9600 DAG.getTargetConstant(0, dl, MVT::i32));
9601 SDValue NewLoad = DAG.getMaskedLoad(
9602 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
9603 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
9604 N->getExtensionType(), N->isExpandingLoad());
9605 SDValue Combo = NewLoad;
9606 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
9607 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
9608 isZeroVector(PassThru->getOperand(0));
9609 if (!PassThru.isUndef() && !PassThruIsCastZero)
9610 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
9611 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
9612}
9613
9614static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG,
9615 const ARMSubtarget *ST) {
9616 if (!ST->hasMVEIntegerOps())
9617 return SDValue();
9618
9619 SDLoc dl(Op);
9620 unsigned BaseOpcode = 0;
9621 switch (Op->getOpcode()) {
9622 default: llvm_unreachable("Expected VECREDUCE opcode")::llvm::llvm_unreachable_internal("Expected VECREDUCE opcode"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9622)
;
9623 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
9624 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
9625 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
9626 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
9627 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
9628 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
9629 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
9630 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
9631 }
9632
9633 SDValue Op0 = Op->getOperand(0);
9634 EVT VT = Op0.getValueType();
9635 EVT EltVT = VT.getVectorElementType();
9636 unsigned NumElts = VT.getVectorNumElements();
9637 unsigned NumActiveLanes = NumElts;
9638
9639 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||(((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes
== 4 || NumActiveLanes == 2) && "Only expected a power 2 vector size"
) ? static_cast<void> (0) : __assert_fail ("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9641, __PRETTY_FUNCTION__))
9640 NumActiveLanes == 2) &&(((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes
== 4 || NumActiveLanes == 2) && "Only expected a power 2 vector size"
) ? static_cast<void> (0) : __assert_fail ("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9641, __PRETTY_FUNCTION__))
9641 "Only expected a power 2 vector size")(((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes
== 4 || NumActiveLanes == 2) && "Only expected a power 2 vector size"
) ? static_cast<void> (0) : __assert_fail ("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9641, __PRETTY_FUNCTION__))
;
9642
9643 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
9644 // allows us to easily extract vector elements from the lanes.
9645 while (NumActiveLanes > 4) {
9646 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
9647 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
9648 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
9649 NumActiveLanes /= 2;
9650 }
9651
9652 SDValue Res;
9653 if (NumActiveLanes == 4) {
9654 // The remaining 4 elements are summed sequentially
9655 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9656 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
9657 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9658 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
9659 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9660 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
9661 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9662 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
9663 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
9664 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
9665 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
9666 } else {
9667 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9668 DAG.getConstant(0, dl, MVT::i32));
9669 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
9670 DAG.getConstant(1, dl, MVT::i32));
9671 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
9672 }
9673
9674 // Result type may be wider than element type.
9675 if (EltVT != Op->getValueType(0))
9676 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
9677 return Res;
9678}
9679
9680static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,
9681 const ARMSubtarget *ST) {
9682 if (!ST->hasMVEFloatOps())
9683 return SDValue();
9684 return LowerVecReduce(Op, DAG, ST);
9685}
9686
9687static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
9688 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
9689 // Acquire/Release load/store is not legal for targets without a dmb or
9690 // equivalent available.
9691 return SDValue();
9692
9693 // Monotonic load/store is legal for all targets.
9694 return Op;
9695}
9696
9697static void ReplaceREADCYCLECOUNTER(SDNode *N,
9698 SmallVectorImpl<SDValue> &Results,
9699 SelectionDAG &DAG,
9700 const ARMSubtarget *Subtarget) {
9701 SDLoc DL(N);
9702 // Under Power Management extensions, the cycle-count is:
9703 // mrc p15, #0, <Rt>, c9, c13, #0
9704 SDValue Ops[] = { N->getOperand(0), // Chain
9705 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
9706 DAG.getTargetConstant(15, DL, MVT::i32),
9707 DAG.getTargetConstant(0, DL, MVT::i32),
9708 DAG.getTargetConstant(9, DL, MVT::i32),
9709 DAG.getTargetConstant(13, DL, MVT::i32),
9710 DAG.getTargetConstant(0, DL, MVT::i32)
9711 };
9712
9713 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
9714 DAG.getVTList(MVT::i32, MVT::Other), Ops);
9715 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
9716 DAG.getConstant(0, DL, MVT::i32)));
9717 Results.push_back(Cycles32.getValue(1));
9718}
9719
9720static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
9721 SDLoc dl(V.getNode());
9722 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
9723 SDValue VHi = DAG.getAnyExtOrTrunc(
9724 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
9725 dl, MVT::i32);
9726 bool isBigEndian = DAG.getDataLayout().isBigEndian();
9727 if (isBigEndian)
9728 std::swap (VLo, VHi);
9729 SDValue RegClass =
9730 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
9731 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
9732 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
9733 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
9734 return SDValue(
9735 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
9736}
9737
9738static void ReplaceCMP_SWAP_64Results(SDNode *N,
9739 SmallVectorImpl<SDValue> & Results,
9740 SelectionDAG &DAG) {
9741 assert(N->getValueType(0) == MVT::i64 &&((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9742, __PRETTY_FUNCTION__))
9742 "AtomicCmpSwap on types less than 64 should be legal")((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9742, __PRETTY_FUNCTION__))
;
9743 SDValue Ops[] = {N->getOperand(1),
9744 createGPRPairNode(DAG, N->getOperand(2)),
9745 createGPRPairNode(DAG, N->getOperand(3)),
9746 N->getOperand(0)};
9747 SDNode *CmpSwap = DAG.getMachineNode(
9748 ARM::CMP_SWAP_64, SDLoc(N),
9749 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
9750
9751 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
9752 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
9753
9754 bool isBigEndian = DAG.getDataLayout().isBigEndian();
9755
9756 SDValue Lo =
9757 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
9758 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
9759 SDValue Hi =
9760 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
9761 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
9762 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
9763 Results.push_back(SDValue(CmpSwap, 2));
9764}
9765
9766SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
9767 SDLoc dl(Op);
9768 EVT VT = Op.getValueType();
9769 SDValue Chain = Op.getOperand(0);
9770 SDValue LHS = Op.getOperand(1);
9771 SDValue RHS = Op.getOperand(2);
9772 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
9773 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
9774
9775 // If we don't have instructions of this float type then soften to a libcall
9776 // and use SETCC instead.
9777 if (isUnsupportedFloatingType(LHS.getValueType())) {
9778 DAG.getTargetLoweringInfo().softenSetCCOperands(
9779 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
9780 if (!RHS.getNode()) {
9781 RHS = DAG.getConstant(0, dl, LHS.getValueType());
9782 CC = ISD::SETNE;
9783 }
9784 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
9785 DAG.getCondCode(CC));
9786 return DAG.getMergeValues({Result, Chain}, dl);
9787 }
9788
9789 ARMCC::CondCodes CondCode, CondCode2;
9790 FPCCToARMCC(CC, CondCode, CondCode2);
9791
9792 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
9793 // in CMPFP and CMPFPE, but instead it should be made explicit by these
9794 // instructions using a chain instead of glue. This would also fix the problem
9795 // here (and also in LowerSELECT_CC) where we generate two comparisons when
9796 // CondCode2 != AL.
9797 SDValue True = DAG.getConstant(1, dl, VT);
9798 SDValue False = DAG.getConstant(0, dl, VT);
9799 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
9800 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
9801 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
9802 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
9803 if (CondCode2 != ARMCC::AL) {
9804 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
9805 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
9806 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
9807 }
9808 return DAG.getMergeValues({Result, Chain}, dl);
9809}
9810
9811SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9812 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Lowering node: "; Op.dump();
} } while (false)
;
9813 switch (Op.getOpcode()) {
9814 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9814)
;
9815 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
9816 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
9817 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
9818 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
9819 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
9820 case ISD::SELECT: return LowerSELECT(Op, DAG);
9821 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
9822 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
9823 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
9824 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
9825 case ISD::VASTART: return LowerVASTART(Op, DAG);
9826 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
9827 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
9828 case ISD::SINT_TO_FP:
9829 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
9830 case ISD::STRICT_FP_TO_SINT:
9831 case ISD::STRICT_FP_TO_UINT:
9832 case ISD::FP_TO_SINT:
9833 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
9834 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
9835 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
9836 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
9837 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
9838 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
9839 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
9840 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
9841 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
9842 Subtarget);
9843 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
9844 case ISD::SHL:
9845 case ISD::SRL:
9846 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
9847 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
9848 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
9849 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
9850 case ISD::SRL_PARTS:
9851 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
9852 case ISD::CTTZ:
9853 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
9854 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
9855 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
9856 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
9857 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
9858 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
9859 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
9860 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
9861 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
9862 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
9863 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
9864 case ISD::TRUNCATE: return LowerTruncatei1(Op, DAG, Subtarget);
9865 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
9866 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
9867 case ISD::MUL: return LowerMUL(Op, DAG);
9868 case ISD::SDIV:
9869 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
9870 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
9871 return LowerSDIV(Op, DAG, Subtarget);
9872 case ISD::UDIV:
9873 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
9874 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
9875 return LowerUDIV(Op, DAG, Subtarget);
9876 case ISD::ADDCARRY:
9877 case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
9878 case ISD::SADDO:
9879 case ISD::SSUBO:
9880 return LowerSignedALUO(Op, DAG);
9881 case ISD::UADDO:
9882 case ISD::USUBO:
9883 return LowerUnsignedALUO(Op, DAG);
9884 case ISD::SADDSAT:
9885 case ISD::SSUBSAT:
9886 return LowerSADDSUBSAT(Op, DAG, Subtarget);
9887 case ISD::LOAD:
9888 return LowerPredicateLoad(Op, DAG);
9889 case ISD::STORE:
9890 return LowerSTORE(Op, DAG, Subtarget);
9891 case ISD::MLOAD:
9892 return LowerMLOAD(Op, DAG);
9893 case ISD::VECREDUCE_MUL:
9894 case ISD::VECREDUCE_AND:
9895 case ISD::VECREDUCE_OR:
9896 case ISD::VECREDUCE_XOR:
9897 return LowerVecReduce(Op, DAG, Subtarget);
9898 case ISD::VECREDUCE_FADD:
9899 case ISD::VECREDUCE_FMUL:
9900 case ISD::VECREDUCE_FMIN:
9901 case ISD::VECREDUCE_FMAX:
9902 return LowerVecReduceF(Op, DAG, Subtarget);
9903 case ISD::ATOMIC_LOAD:
9904 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
9905 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
9906 case ISD::SDIVREM:
9907 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
9908 case ISD::DYNAMIC_STACKALLOC:
9909 if (Subtarget->isTargetWindows())
9910 return LowerDYNAMIC_STACKALLOC(Op, DAG);
9911 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9911)
;
9912 case ISD::STRICT_FP_ROUND:
9913 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
9914 case ISD::STRICT_FP_EXTEND:
9915 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
9916 case ISD::STRICT_FSETCC:
9917 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
9918 case ARMISD::WIN__DBZCHK: return SDValue();
9919 }
9920}
9921
9922static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
9923 SelectionDAG &DAG) {
9924 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9925 unsigned Opc = 0;
9926 if (IntNo == Intrinsic::arm_smlald)
9927 Opc = ARMISD::SMLALD;
9928 else if (IntNo == Intrinsic::arm_smlaldx)
9929 Opc = ARMISD::SMLALDX;
9930 else if (IntNo == Intrinsic::arm_smlsld)
9931 Opc = ARMISD::SMLSLD;
9932 else if (IntNo == Intrinsic::arm_smlsldx)
9933 Opc = ARMISD::SMLSLDX;
9934 else
9935 return;
9936
9937 SDLoc dl(N);
9938 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
9939 N->getOperand(3),
9940 DAG.getConstant(0, dl, MVT::i32));
9941 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
9942 N->getOperand(3),
9943 DAG.getConstant(1, dl, MVT::i32));
9944
9945 SDValue LongMul = DAG.getNode(Opc, dl,
9946 DAG.getVTList(MVT::i32, MVT::i32),
9947 N->getOperand(1), N->getOperand(2),
9948 Lo, Hi);
9949 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
9950 LongMul.getValue(0), LongMul.getValue(1)));
9951}
9952
9953/// ReplaceNodeResults - Replace the results of node with an illegal result
9954/// type with new values built out of custom code.
9955void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
9956 SmallVectorImpl<SDValue> &Results,
9957 SelectionDAG &DAG) const {
9958 SDValue Res;
9959 switch (N->getOpcode()) {
9960 default:
9961 llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9961)
;
9962 case ISD::READ_REGISTER:
9963 ExpandREAD_REGISTER(N, Results, DAG);
9964 break;
9965 case ISD::BITCAST:
9966 Res = ExpandBITCAST(N, DAG, Subtarget);
9967 break;
9968 case ISD::SRL:
9969 case ISD::SRA:
9970 case ISD::SHL:
9971 Res = Expand64BitShift(N, DAG, Subtarget);
9972 break;
9973 case ISD::SREM:
9974 case ISD::UREM:
9975 Res = LowerREM(N, DAG);
9976 break;
9977 case ISD::SDIVREM:
9978 case ISD::UDIVREM:
9979 Res = LowerDivRem(SDValue(N, 0), DAG);
9980 assert(Res.getNumOperands() == 2 && "DivRem needs two values")((Res.getNumOperands() == 2 && "DivRem needs two values"
) ? static_cast<void> (0) : __assert_fail ("Res.getNumOperands() == 2 && \"DivRem needs two values\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9980, __PRETTY_FUNCTION__))
;
9981 Results.push_back(Res.getValue(0));
9982 Results.push_back(Res.getValue(1));
9983 return;
9984 case ISD::SADDSAT:
9985 case ISD::SSUBSAT:
9986 Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
9987 break;
9988 case ISD::READCYCLECOUNTER:
9989 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
9990 return;
9991 case ISD::UDIV:
9992 case ISD::SDIV:
9993 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows")((Subtarget->isTargetWindows() && "can only expand DIV on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"can only expand DIV on Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9993, __PRETTY_FUNCTION__))
;
9994 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
9995 Results);
9996 case ISD::ATOMIC_CMP_SWAP:
9997 ReplaceCMP_SWAP_64Results(N, Results, DAG);
9998 return;
9999 case ISD::INTRINSIC_WO_CHAIN:
10000 return ReplaceLongIntrinsic(N, Results, DAG);
10001 case ISD::ABS:
10002 lowerABS(N, Results, DAG);
10003 return ;
10004 case ISD::LOAD:
10005 LowerLOAD(N, Results, DAG);
10006 break;
10007 }
10008 if (Res.getNode())
10009 Results.push_back(Res);
10010}
10011
10012//===----------------------------------------------------------------------===//
10013// ARM Scheduler Hooks
10014//===----------------------------------------------------------------------===//
10015
10016/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10017/// registers the function context.
10018void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10019 MachineBasicBlock *MBB,
10020 MachineBasicBlock *DispatchBB,
10021 int FI) const {
10022 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported with SjLj") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10023, __PRETTY_FUNCTION__))
10023 "ROPI/RWPI not currently supported with SjLj")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported with SjLj") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10023, __PRETTY_FUNCTION__))
;
10024 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10025 DebugLoc dl = MI.getDebugLoc();
10026 MachineFunction *MF = MBB->getParent();
10027 MachineRegisterInfo *MRI = &MF->getRegInfo();
10028 MachineConstantPool *MCP = MF->getConstantPool();
10029 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10030 const Function &F = MF->getFunction();
10031
10032 bool isThumb = Subtarget->isThumb();
10033 bool isThumb2 = Subtarget->isThumb2();
10034
10035 unsigned PCLabelId = AFI->createPICLabelUId();
10036 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10037 ARMConstantPoolValue *CPV =
10038 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10039 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10040
10041 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10042 : &ARM::GPRRegClass;
10043
10044 // Grab constant pool and fixed stack memory operands.
10045 MachineMemOperand *CPMMO =
10046 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10047 MachineMemOperand::MOLoad, 4, Align(4));
10048
10049 MachineMemOperand *FIMMOSt =
10050 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
10051 MachineMemOperand::MOStore, 4, Align(4));
10052
10053 // Load the address of the dispatch MBB into the jump buffer.
10054 if (isThumb2) {
10055 // Incoming value: jbuf
10056 // ldr.n r5, LCPI1_1
10057 // orr r5, r5, #1
10058 // add r5, pc
10059 // str r5, [$jbuf, #+4] ; &jbuf[1]
10060 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10061 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10062 .addConstantPoolIndex(CPI)
10063 .addMemOperand(CPMMO)
10064 .add(predOps(ARMCC::AL));
10065 // Set the low bit because of thumb mode.
10066 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10067 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10068 .addReg(NewVReg1, RegState::Kill)
10069 .addImm(0x01)
10070 .add(predOps(ARMCC::AL))
10071 .add(condCodeOp());
10072 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10073 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10074 .addReg(NewVReg2, RegState::Kill)
10075 .addImm(PCLabelId);
10076 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10077 .addReg(NewVReg3, RegState::Kill)
10078 .addFrameIndex(FI)
10079 .addImm(36) // &jbuf[1] :: pc
10080 .addMemOperand(FIMMOSt)
10081 .add(predOps(ARMCC::AL));
10082 } else if (isThumb) {
10083 // Incoming value: jbuf
10084 // ldr.n r1, LCPI1_4
10085 // add r1, pc
10086 // mov r2, #1
10087 // orrs r1, r2
10088 // add r2, $jbuf, #+4 ; &jbuf[1]
10089 // str r1, [r2]
10090 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10091 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10092 .addConstantPoolIndex(CPI)
10093 .addMemOperand(CPMMO)
10094 .add(predOps(ARMCC::AL));
10095 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10096 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10097 .addReg(NewVReg1, RegState::Kill)
10098 .addImm(PCLabelId);
10099 // Set the low bit because of thumb mode.
10100 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10101 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10102 .addReg(ARM::CPSR, RegState::Define)
10103 .addImm(1)
10104 .add(predOps(ARMCC::AL));
10105 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10106 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10107 .addReg(ARM::CPSR, RegState::Define)
10108 .addReg(NewVReg2, RegState::Kill)
10109 .addReg(NewVReg3, RegState::Kill)
10110 .add(predOps(ARMCC::AL));
10111 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10112 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10113 .addFrameIndex(FI)
10114 .addImm(36); // &jbuf[1] :: pc
10115 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10116 .addReg(NewVReg4, RegState::Kill)
10117 .addReg(NewVReg5, RegState::Kill)
10118 .addImm(0)
10119 .addMemOperand(FIMMOSt)
10120 .add(predOps(ARMCC::AL));
10121 } else {
10122 // Incoming value: jbuf
10123 // ldr r1, LCPI1_1
10124 // add r1, pc, r1
10125 // str r1, [$jbuf, #+4] ; &jbuf[1]
10126 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10127 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10128 .addConstantPoolIndex(CPI)
10129 .addImm(0)
10130 .addMemOperand(CPMMO)
10131 .add(predOps(ARMCC::AL));
10132 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10133 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10134 .addReg(NewVReg1, RegState::Kill)
10135 .addImm(PCLabelId)
10136 .add(predOps(ARMCC::AL));
10137 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10138 .addReg(NewVReg2, RegState::Kill)
10139 .addFrameIndex(FI)
10140 .addImm(36) // &jbuf[1] :: pc
10141 .addMemOperand(FIMMOSt)
10142 .add(predOps(ARMCC::AL));
10143 }
10144}
10145
10146void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10147 MachineBasicBlock *MBB) const {
10148 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10149 DebugLoc dl = MI.getDebugLoc();
10150 MachineFunction *MF = MBB->getParent();
10151 MachineRegisterInfo *MRI = &MF->getRegInfo();
10152 MachineFrameInfo &MFI = MF->getFrameInfo();
10153 int FI = MFI.getFunctionContextIndex();
10154
10155 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10156 : &ARM::GPRnopcRegClass;
10157
10158 // Get a mapping of the call site numbers to all of the landing pads they're
10159 // associated with.
10160 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10161 unsigned MaxCSNum = 0;
10162 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
10163 ++BB) {
10164 if (!BB->isEHPad()) continue;
10165
10166 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10167 // pad.
10168 for (MachineBasicBlock::iterator
10169 II = BB->begin(), IE = BB->end(); II != IE; ++II) {
10170 if (!II->isEHLabel()) continue;
10171
10172 MCSymbol *Sym = II->getOperand(0).getMCSymbol();
10173 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10174
10175 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10176 for (SmallVectorImpl<unsigned>::iterator
10177 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
10178 CSI != CSE; ++CSI) {
10179 CallSiteNumToLPad[*CSI].push_back(&*BB);
10180 MaxCSNum = std::max(MaxCSNum, *CSI);
10181 }
10182 break;
10183 }
10184 }
10185
10186 // Get an ordered list of the machine basic blocks for the jump table.
10187 std::vector<MachineBasicBlock*> LPadList;
10188 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10189 LPadList.reserve(CallSiteNumToLPad.size());
10190 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10191 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10192 for (SmallVectorImpl<MachineBasicBlock*>::iterator
10193 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
10194 LPadList.push_back(*II);
10195 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
10196 }
10197 }
10198
10199 assert(!LPadList.empty() &&((!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? static_cast<void> (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10200, __PRETTY_FUNCTION__))
10200 "No landing pad destinations for the dispatch jump table!")((!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? static_cast<void> (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10200, __PRETTY_FUNCTION__))
;
10201
10202 // Create the jump table and associated information.
10203 MachineJumpTableInfo *JTI =
10204 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10205 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10206
10207 // Create the MBBs for the dispatch code.
10208
10209 // Shove the dispatch's address into the return slot in the function context.
10210 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10211 DispatchBB->setIsEHPad();
10212
10213 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10214 unsigned trap_opcode;
10215 if (Subtarget->isThumb())
10216 trap_opcode = ARM::tTRAP;
10217 else
10218 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10219
10220 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10221 DispatchBB->addSuccessor(TrapBB);
10222
10223 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10224 DispatchBB->addSuccessor(DispContBB);
10225
10226 // Insert and MBBs.
10227 MF->insert(MF->end(), DispatchBB);
10228 MF->insert(MF->end(), DispContBB);
10229 MF->insert(MF->end(), TrapBB);
10230
10231 // Insert code into the entry block that creates and registers the function
10232 // context.
10233 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10234
10235 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10236 MachinePointerInfo::getFixedStack(*MF, FI),
10237 MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, Align(4));
10238
10239 MachineInstrBuilder MIB;
10240 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10241
10242 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10243 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10244
10245 // Add a register mask with no preserved registers. This results in all
10246 // registers being marked as clobbered. This can't work if the dispatch block
10247 // is in a Thumb1 function and is linked with ARM code which uses the FP
10248 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
10249 MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
10250
10251 bool IsPositionIndependent = isPositionIndependent();
10252 unsigned NumLPads = LPadList.size();
10253 if (Subtarget->isThumb2()) {
10254 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10255 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
10256 .addFrameIndex(FI)
10257 .addImm(4)
10258 .addMemOperand(FIMMOLd)
10259 .add(predOps(ARMCC::AL));
10260
10261 if (NumLPads < 256) {
10262 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
10263 .addReg(NewVReg1)
10264 .addImm(LPadList.size())
10265 .add(predOps(ARMCC::AL));
10266 } else {
10267 Register VReg1 = MRI->createVirtualRegister(TRC);
10268 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
10269 .addImm(NumLPads & 0xFFFF)
10270 .add(predOps(ARMCC::AL));
10271
10272 unsigned VReg2 = VReg1;
10273 if ((NumLPads & 0xFFFF0000) != 0) {
10274 VReg2 = MRI->createVirtualRegister(TRC);
10275 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
10276 .addReg(VReg1)
10277 .addImm(NumLPads >> 16)
10278 .add(predOps(ARMCC::AL));
10279 }
10280
10281 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
10282 .addReg(NewVReg1)
10283 .addReg(VReg2)
10284 .add(predOps(ARMCC::AL));
10285 }
10286
10287 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
10288 .addMBB(TrapBB)
10289 .addImm(ARMCC::HI)
10290 .addReg(ARM::CPSR);
10291
10292 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10293 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
10294 .addJumpTableIndex(MJTI)
10295 .add(predOps(ARMCC::AL));
10296
10297 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10298 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
10299 .addReg(NewVReg3, RegState::Kill)
10300 .addReg(NewVReg1)
10301 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
10302 .add(predOps(ARMCC::AL))
10303 .add(condCodeOp());
10304
10305 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
10306 .addReg(NewVReg4, RegState::Kill)
10307 .addReg(NewVReg1)
10308 .addJumpTableIndex(MJTI);
10309 } else if (Subtarget->isThumb()) {
10310 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10311 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
10312 .addFrameIndex(FI)
10313 .addImm(1)
10314 .addMemOperand(FIMMOLd)
10315 .add(predOps(ARMCC::AL));
10316
10317 if (NumLPads < 256) {
10318 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
10319 .addReg(NewVReg1)
10320 .addImm(NumLPads)
10321 .add(predOps(ARMCC::AL));
10322 } else {
10323 MachineConstantPool *ConstantPool = MF->getConstantPool();
10324 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10325 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10326
10327 // MachineConstantPool wants an explicit alignment.
10328 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10329 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10330
10331 Register VReg1 = MRI->createVirtualRegister(TRC);
10332 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
10333 .addReg(VReg1, RegState::Define)
10334 .addConstantPoolIndex(Idx)
10335 .add(predOps(ARMCC::AL));
10336 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
10337 .addReg(NewVReg1)
10338 .addReg(VReg1)
10339 .add(predOps(ARMCC::AL));
10340 }
10341
10342 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
10343 .addMBB(TrapBB)
10344 .addImm(ARMCC::HI)
10345 .addReg(ARM::CPSR);
10346
10347 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10348 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
10349 .addReg(ARM::CPSR, RegState::Define)
10350 .addReg(NewVReg1)
10351 .addImm(2)
10352 .add(predOps(ARMCC::AL));
10353
10354 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10355 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
10356 .addJumpTableIndex(MJTI)
10357 .add(predOps(ARMCC::AL));
10358
10359 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10360 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
10361 .addReg(ARM::CPSR, RegState::Define)
10362 .addReg(NewVReg2, RegState::Kill)
10363 .addReg(NewVReg3)
10364 .add(predOps(ARMCC::AL));
10365
10366 MachineMemOperand *JTMMOLd =
10367 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10368 MachineMemOperand::MOLoad, 4, Align(4));
10369
10370 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10371 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
10372 .addReg(NewVReg4, RegState::Kill)
10373 .addImm(0)
10374 .addMemOperand(JTMMOLd)
10375 .add(predOps(ARMCC::AL));
10376
10377 unsigned NewVReg6 = NewVReg5;
10378 if (IsPositionIndependent) {
10379 NewVReg6 = MRI->createVirtualRegister(TRC);
10380 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
10381 .addReg(ARM::CPSR, RegState::Define)
10382 .addReg(NewVReg5, RegState::Kill)
10383 .addReg(NewVReg3)
10384 .add(predOps(ARMCC::AL));
10385 }
10386
10387 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
10388 .addReg(NewVReg6, RegState::Kill)
10389 .addJumpTableIndex(MJTI);
10390 } else {
10391 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10392 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
10393 .addFrameIndex(FI)
10394 .addImm(4)
10395 .addMemOperand(FIMMOLd)
10396 .add(predOps(ARMCC::AL));
10397
10398 if (NumLPads < 256) {
10399 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
10400 .addReg(NewVReg1)
10401 .addImm(NumLPads)
10402 .add(predOps(ARMCC::AL));
10403 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
10404 Register VReg1 = MRI->createVirtualRegister(TRC);
10405 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
10406 .addImm(NumLPads & 0xFFFF)
10407 .add(predOps(ARMCC::AL));
10408
10409 unsigned VReg2 = VReg1;
10410 if ((NumLPads & 0xFFFF0000) != 0) {
10411 VReg2 = MRI->createVirtualRegister(TRC);
10412 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
10413 .addReg(VReg1)
10414 .addImm(NumLPads >> 16)
10415 .add(predOps(ARMCC::AL));
10416 }
10417
10418 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10419 .addReg(NewVReg1)
10420 .addReg(VReg2)
10421 .add(predOps(ARMCC::AL));
10422 } else {
10423 MachineConstantPool *ConstantPool = MF->getConstantPool();
10424 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10425 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10426
10427 // MachineConstantPool wants an explicit alignment.
10428 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10429 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10430
10431 Register VReg1 = MRI->createVirtualRegister(TRC);
10432 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
10433 .addReg(VReg1, RegState::Define)
10434 .addConstantPoolIndex(Idx)
10435 .addImm(0)
10436 .add(predOps(ARMCC::AL));
10437 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10438 .addReg(NewVReg1)
10439 .addReg(VReg1, RegState::Kill)
10440 .add(predOps(ARMCC::AL));
10441 }
10442
10443 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
10444 .addMBB(TrapBB)
10445 .addImm(ARMCC::HI)
10446 .addReg(ARM::CPSR);
10447
10448 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10449 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
10450 .addReg(NewVReg1)
10451 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
10452 .add(predOps(ARMCC::AL))
10453 .add(condCodeOp());
10454 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10455 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
10456 .addJumpTableIndex(MJTI)
10457 .add(predOps(ARMCC::AL));
10458
10459 MachineMemOperand *JTMMOLd =
10460 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10461 MachineMemOperand::MOLoad, 4, Align(4));
10462 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10463 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
10464 .addReg(NewVReg3, RegState::Kill)
10465 .addReg(NewVReg4)
10466 .addImm(0)
10467 .addMemOperand(JTMMOLd)
10468 .add(predOps(ARMCC::AL));
10469
10470 if (IsPositionIndependent) {
10471 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
10472 .addReg(NewVReg5, RegState::Kill)
10473 .addReg(NewVReg4)
10474 .addJumpTableIndex(MJTI);
10475 } else {
10476 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
10477 .addReg(NewVReg5, RegState::Kill)
10478 .addJumpTableIndex(MJTI);
10479 }
10480 }
10481
10482 // Add the jump table entries as successors to the MBB.
10483 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
10484 for (std::vector<MachineBasicBlock*>::iterator
10485 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
10486 MachineBasicBlock *CurMBB = *I;
10487 if (SeenMBBs.insert(CurMBB).second)
10488 DispContBB->addSuccessor(CurMBB);
10489 }
10490
10491 // N.B. the order the invoke BBs are processed in doesn't matter here.
10492 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
10493 SmallVector<MachineBasicBlock*, 64> MBBLPads;
10494 for (MachineBasicBlock *BB : InvokeBBs) {
10495
10496 // Remove the landing pad successor from the invoke block and replace it
10497 // with the new dispatch block.
10498 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
10499 while (!Successors.empty()) {
10500 MachineBasicBlock *SMBB = Successors.pop_back_val();
10501 if (SMBB->isEHPad()) {
10502 BB->removeSuccessor(SMBB);
10503 MBBLPads.push_back(SMBB);
10504 }
10505 }
10506
10507 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
10508 BB->normalizeSuccProbs();
10509
10510 // Find the invoke call and mark all of the callee-saved registers as
10511 // 'implicit defined' so that they're spilled. This prevents code from
10512 // moving instructions to before the EH block, where they will never be
10513 // executed.
10514 for (MachineBasicBlock::reverse_iterator
10515 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
10516 if (!II->isCall()) continue;
10517
10518 DenseMap<unsigned, bool> DefRegs;
10519 for (MachineInstr::mop_iterator
10520 OI = II->operands_begin(), OE = II->operands_end();
10521 OI != OE; ++OI) {
10522 if (!OI->isReg()) continue;
10523 DefRegs[OI->getReg()] = true;
10524 }
10525
10526 MachineInstrBuilder MIB(*MF, &*II);
10527
10528 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
10529 unsigned Reg = SavedRegs[i];
10530 if (Subtarget->isThumb2() &&
10531 !ARM::tGPRRegClass.contains(Reg) &&
10532 !ARM::hGPRRegClass.contains(Reg))
10533 continue;
10534 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
10535 continue;
10536 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
10537 continue;
10538 if (!DefRegs[Reg])
10539 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
10540 }
10541
10542 break;
10543 }
10544 }
10545
10546 // Mark all former landing pads as non-landing pads. The dispatch is the only
10547 // landing pad now.
10548 for (SmallVectorImpl<MachineBasicBlock*>::iterator
10549 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
10550 (*I)->setIsEHPad(false);
10551
10552 // The instruction is gone now.
10553 MI.eraseFromParent();
10554}
10555
10556static
10557MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
10558 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
10559 E = MBB->succ_end(); I != E; ++I)
10560 if (*I != Succ)
10561 return *I;
10562 llvm_unreachable("Expecting a BB with two successors!")::llvm::llvm_unreachable_internal("Expecting a BB with two successors!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10562)
;
10563}
10564
10565/// Return the load opcode for a given load size. If load size >= 8,
10566/// neon opcode will be returned.
10567static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
10568 if (LdSize >= 8)
10569 return LdSize == 16 ? ARM::VLD1q32wb_fixed
10570 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
10571 if (IsThumb1)
10572 return LdSize == 4 ? ARM::tLDRi
10573 : LdSize == 2 ? ARM::tLDRHi
10574 : LdSize == 1 ? ARM::tLDRBi : 0;
10575 if (IsThumb2)
10576 return LdSize == 4 ? ARM::t2LDR_POST
10577 : LdSize == 2 ? ARM::t2LDRH_POST
10578 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
10579 return LdSize == 4 ? ARM::LDR_POST_IMM
10580 : LdSize == 2 ? ARM::LDRH_POST
10581 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
10582}
10583
10584/// Return the store opcode for a given store size. If store size >= 8,
10585/// neon opcode will be returned.
10586static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
10587 if (StSize >= 8)
10588 return StSize == 16 ? ARM::VST1q32wb_fixed
10589 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
10590 if (IsThumb1)
10591 return StSize == 4 ? ARM::tSTRi
10592 : StSize == 2 ? ARM::tSTRHi
10593 : StSize == 1 ? ARM::tSTRBi : 0;
10594 if (IsThumb2)
10595 return StSize == 4 ? ARM::t2STR_POST
10596 : StSize == 2 ? ARM::t2STRH_POST
10597 : StSize == 1 ? ARM::t2STRB_POST : 0;
10598 return StSize == 4 ? ARM::STR_POST_IMM
10599 : StSize == 2 ? ARM::STRH_POST
10600 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
10601}
10602
10603/// Emit a post-increment load operation with given size. The instructions
10604/// will be added to BB at Pos.
10605static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
10606 const TargetInstrInfo *TII, const DebugLoc &dl,
10607 unsigned LdSize, unsigned Data, unsigned AddrIn,
10608 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
10609 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
10610 assert(LdOpc != 0 && "Should have a load opcode")((LdOpc != 0 && "Should have a load opcode") ? static_cast
<void> (0) : __assert_fail ("LdOpc != 0 && \"Should have a load opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10610, __PRETTY_FUNCTION__))
;
10611 if (LdSize >= 8) {
10612 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10613 .addReg(AddrOut, RegState::Define)
10614 .addReg(AddrIn)
10615 .addImm(0)
10616 .add(predOps(ARMCC::AL));
10617 } else if (IsThumb1) {
10618 // load + update AddrIn
10619 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10620 .addReg(AddrIn)
10621 .addImm(0)
10622 .add(predOps(ARMCC::AL));
10623 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
10624 .add(t1CondCodeOp())
10625 .addReg(AddrIn)
10626 .addImm(LdSize)
10627 .add(predOps(ARMCC::AL));
10628 } else if (IsThumb2) {
10629 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10630 .addReg(AddrOut, RegState::Define)
10631 .addReg(AddrIn)
10632 .addImm(LdSize)
10633 .add(predOps(ARMCC::AL));
10634 } else { // arm
10635 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
10636 .addReg(AddrOut, RegState::Define)
10637 .addReg(AddrIn)
10638 .addReg(0)
10639 .addImm(LdSize)
10640 .add(predOps(ARMCC::AL));
10641 }
10642}
10643
10644/// Emit a post-increment store operation with given size. The instructions
10645/// will be added to BB at Pos.
10646static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
10647 const TargetInstrInfo *TII, const DebugLoc &dl,
10648 unsigned StSize, unsigned Data, unsigned AddrIn,
10649 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
10650 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
10651 assert(StOpc != 0 && "Should have a store opcode")((StOpc != 0 && "Should have a store opcode") ? static_cast
<void> (0) : __assert_fail ("StOpc != 0 && \"Should have a store opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10651, __PRETTY_FUNCTION__))
;
10652 if (StSize >= 8) {
10653 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10654 .addReg(AddrIn)
10655 .addImm(0)
10656 .addReg(Data)
10657 .add(predOps(ARMCC::AL));
10658 } else if (IsThumb1) {
10659 // store + update AddrIn
10660 BuildMI(*BB, Pos, dl, TII->get(StOpc))
10661 .addReg(Data)
10662 .addReg(AddrIn)
10663 .addImm(0)
10664 .add(predOps(ARMCC::AL));
10665 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
10666 .add(t1CondCodeOp())
10667 .addReg(AddrIn)
10668 .addImm(StSize)
10669 .add(predOps(ARMCC::AL));
10670 } else if (IsThumb2) {
10671 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10672 .addReg(Data)
10673 .addReg(AddrIn)
10674 .addImm(StSize)
10675 .add(predOps(ARMCC::AL));
10676 } else { // arm
10677 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
10678 .addReg(Data)
10679 .addReg(AddrIn)
10680 .addReg(0)
10681 .addImm(StSize)
10682 .add(predOps(ARMCC::AL));
10683 }
10684}
10685
10686MachineBasicBlock *
10687ARMTargetLowering::EmitStructByval(MachineInstr &MI,
10688 MachineBasicBlock *BB) const {
10689 // This pseudo instruction has 3 operands: dst, src, size
10690 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
10691 // Otherwise, we will generate unrolled scalar copies.
10692 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10693 const BasicBlock *LLVM_BB = BB->getBasicBlock();
10694 MachineFunction::iterator It = ++BB->getIterator();
10695
10696 Register dest = MI.getOperand(0).getReg();
10697 Register src = MI.getOperand(1).getReg();
10698 unsigned SizeVal = MI.getOperand(2).getImm();
10699 unsigned Alignment = MI.getOperand(3).getImm();
10700 DebugLoc dl = MI.getDebugLoc();
10701
10702 MachineFunction *MF = BB->getParent();
10703 MachineRegisterInfo &MRI = MF->getRegInfo();
10704 unsigned UnitSize = 0;
10705 const TargetRegisterClass *TRC = nullptr;
10706 const TargetRegisterClass *VecTRC = nullptr;
10707
10708 bool IsThumb1 = Subtarget->isThumb1Only();
10709 bool IsThumb2 = Subtarget->isThumb2();
10710 bool IsThumb = Subtarget->isThumb();
10711
10712 if (Alignment & 1) {
10713 UnitSize = 1;
10714 } else if (Alignment & 2) {
10715 UnitSize = 2;
10716 } else {
10717 // Check whether we can use NEON instructions.
10718 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
10719 Subtarget->hasNEON()) {
10720 if ((Alignment % 16 == 0) && SizeVal >= 16)
10721 UnitSize = 16;
10722 else if ((Alignment % 8 == 0) && SizeVal >= 8)
10723 UnitSize = 8;
10724 }
10725 // Can't use NEON instructions.
10726 if (UnitSize == 0)
10727 UnitSize = 4;
10728 }
10729
10730 // Select the correct opcode and register class for unit size load/store
10731 bool IsNeon = UnitSize >= 8;
10732 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
10733 if (IsNeon)
10734 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
10735 : UnitSize == 8 ? &ARM::DPRRegClass
10736 : nullptr;
10737
10738 unsigned BytesLeft = SizeVal % UnitSize;
10739 unsigned LoopSize = SizeVal - BytesLeft;
10740
10741 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
10742 // Use LDR and STR to copy.
10743 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
10744 // [destOut] = STR_POST(scratch, destIn, UnitSize)
10745 unsigned srcIn = src;
10746 unsigned destIn = dest;
10747 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
10748 Register srcOut = MRI.createVirtualRegister(TRC);
10749 Register destOut = MRI.createVirtualRegister(TRC);
10750 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
10751 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
10752 IsThumb1, IsThumb2);
10753 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
10754 IsThumb1, IsThumb2);
10755 srcIn = srcOut;
10756 destIn = destOut;
10757 }
10758
10759 // Handle the leftover bytes with LDRB and STRB.
10760 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
10761 // [destOut] = STRB_POST(scratch, destIn, 1)
10762 for (unsigned i = 0; i < BytesLeft; i++) {
10763 Register srcOut = MRI.createVirtualRegister(TRC);
10764 Register destOut = MRI.createVirtualRegister(TRC);
10765 Register scratch = MRI.createVirtualRegister(TRC);
10766 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
10767 IsThumb1, IsThumb2);
10768 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
10769 IsThumb1, IsThumb2);
10770 srcIn = srcOut;
10771 destIn = destOut;
10772 }
10773 MI.eraseFromParent(); // The instruction is gone now.
10774 return BB;
10775 }
10776
10777 // Expand the pseudo op to a loop.
10778 // thisMBB:
10779 // ...
10780 // movw varEnd, # --> with thumb2
10781 // movt varEnd, #
10782 // ldrcp varEnd, idx --> without thumb2
10783 // fallthrough --> loopMBB
10784 // loopMBB:
10785 // PHI varPhi, varEnd, varLoop
10786 // PHI srcPhi, src, srcLoop
10787 // PHI destPhi, dst, destLoop
10788 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
10789 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
10790 // subs varLoop, varPhi, #UnitSize
10791 // bne loopMBB
10792 // fallthrough --> exitMBB
10793 // exitMBB:
10794 // epilogue to handle left-over bytes
10795 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
10796 // [destOut] = STRB_POST(scratch, destLoop, 1)
10797 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
10798 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
10799 MF->insert(It, loopMBB);
10800 MF->insert(It, exitMBB);
10801
10802 // Transfer the remainder of BB and its successor edges to exitMBB.
10803 exitMBB->splice(exitMBB->begin(), BB,
10804 std::next(MachineBasicBlock::iterator(MI)), BB->end());
10805 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10806
10807 // Load an immediate to varEnd.
10808 Register varEnd = MRI.createVirtualRegister(TRC);
10809 if (Subtarget->useMovt()) {
10810 unsigned Vtmp = varEnd;
10811 if ((LoopSize & 0xFFFF0000) != 0)
10812 Vtmp = MRI.createVirtualRegister(TRC);
10813 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
10814 .addImm(LoopSize & 0xFFFF)
10815 .add(predOps(ARMCC::AL));
10816
10817 if ((LoopSize & 0xFFFF0000) != 0)
10818 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
10819 .addReg(Vtmp)
10820 .addImm(LoopSize >> 16)
10821 .add(predOps(ARMCC::AL));
10822 } else {
10823 MachineConstantPool *ConstantPool = MF->getConstantPool();
10824 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10825 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
10826
10827 // MachineConstantPool wants an explicit alignment.
10828 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10829 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10830 MachineMemOperand *CPMMO =
10831 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10832 MachineMemOperand::MOLoad, 4, Align(4));
10833
10834 if (IsThumb)
10835 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
10836 .addReg(varEnd, RegState::Define)
10837 .addConstantPoolIndex(Idx)
10838 .add(predOps(ARMCC::AL))
10839 .addMemOperand(CPMMO);
10840 else
10841 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
10842 .addReg(varEnd, RegState::Define)
10843 .addConstantPoolIndex(Idx)
10844 .addImm(0)
10845 .add(predOps(ARMCC::AL))
10846 .addMemOperand(CPMMO);
10847 }
10848 BB->addSuccessor(loopMBB);
10849
10850 // Generate the loop body:
10851 // varPhi = PHI(varLoop, varEnd)
10852 // srcPhi = PHI(srcLoop, src)
10853 // destPhi = PHI(destLoop, dst)
10854 MachineBasicBlock *entryBB = BB;
10855 BB = loopMBB;
10856 Register varLoop = MRI.createVirtualRegister(TRC);
10857 Register varPhi = MRI.createVirtualRegister(TRC);
10858 Register srcLoop = MRI.createVirtualRegister(TRC);
10859 Register srcPhi = MRI.createVirtualRegister(TRC);
10860 Register destLoop = MRI.createVirtualRegister(TRC);
10861 Register destPhi = MRI.createVirtualRegister(TRC);
10862
10863 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
10864 .addReg(varLoop).addMBB(loopMBB)
10865 .addReg(varEnd).addMBB(entryBB);
10866 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
10867 .addReg(srcLoop).addMBB(loopMBB)
10868 .addReg(src).addMBB(entryBB);
10869 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
10870 .addReg(destLoop).addMBB(loopMBB)
10871 .addReg(dest).addMBB(entryBB);
10872
10873 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
10874 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
10875 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
10876 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
10877 IsThumb1, IsThumb2);
10878 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
10879 IsThumb1, IsThumb2);
10880
10881 // Decrement loop variable by UnitSize.
10882 if (IsThumb1) {
10883 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
10884 .add(t1CondCodeOp())
10885 .addReg(varPhi)
10886 .addImm(UnitSize)
10887 .add(predOps(ARMCC::AL));
10888 } else {
10889 MachineInstrBuilder MIB =
10890 BuildMI(*BB, BB->end(), dl,
10891 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
10892 MIB.addReg(varPhi)
10893 .addImm(UnitSize)
10894 .add(predOps(ARMCC::AL))
10895 .add(condCodeOp());
10896 MIB->getOperand(5).setReg(ARM::CPSR);
10897 MIB->getOperand(5).setIsDef(true);
10898 }
10899 BuildMI(*BB, BB->end(), dl,
10900 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
10901 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
10902
10903 // loopMBB can loop back to loopMBB or fall through to exitMBB.
10904 BB->addSuccessor(loopMBB);
10905 BB->addSuccessor(exitMBB);
10906
10907 // Add epilogue to handle BytesLeft.
10908 BB = exitMBB;
10909 auto StartOfExit = exitMBB->begin();
10910
10911 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
10912 // [destOut] = STRB_POST(scratch, destLoop, 1)
10913 unsigned srcIn = srcLoop;
10914 unsigned destIn = destLoop;
10915 for (unsigned i = 0; i < BytesLeft; i++) {
10916 Register srcOut = MRI.createVirtualRegister(TRC);
10917 Register destOut = MRI.createVirtualRegister(TRC);
10918 Register scratch = MRI.createVirtualRegister(TRC);
10919 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
10920 IsThumb1, IsThumb2);
10921 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
10922 IsThumb1, IsThumb2);
10923 srcIn = srcOut;
10924 destIn = destOut;
10925 }
10926
10927 MI.eraseFromParent(); // The instruction is gone now.
10928 return BB;
10929}
10930
10931MachineBasicBlock *
10932ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
10933 MachineBasicBlock *MBB) const {
10934 const TargetMachine &TM = getTargetMachine();
10935 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
10936 DebugLoc DL = MI.getDebugLoc();
10937
10938 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10939, __PRETTY_FUNCTION__))
10939 "__chkstk is only supported on Windows")((Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10939, __PRETTY_FUNCTION__))
;
10940 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode")((Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isThumb2() && \"Windows on ARM requires Thumb-2 mode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10940, __PRETTY_FUNCTION__))
;
10941
10942 // __chkstk takes the number of words to allocate on the stack in R4, and
10943 // returns the stack adjustment in number of bytes in R4. This will not
10944 // clober any other registers (other than the obvious lr).
10945 //
10946 // Although, technically, IP should be considered a register which may be
10947 // clobbered, the call itself will not touch it. Windows on ARM is a pure
10948 // thumb-2 environment, so there is no interworking required. As a result, we
10949 // do not expect a veneer to be emitted by the linker, clobbering IP.
10950 //
10951 // Each module receives its own copy of __chkstk, so no import thunk is
10952 // required, again, ensuring that IP is not clobbered.
10953 //
10954 // Finally, although some linkers may theoretically provide a trampoline for
10955 // out of range calls (which is quite common due to a 32M range limitation of
10956 // branches for Thumb), we can generate the long-call version via
10957 // -mcmodel=large, alleviating the need for the trampoline which may clobber
10958 // IP.
10959
10960 switch (TM.getCodeModel()) {
10961 case CodeModel::Tiny:
10962 llvm_unreachable("Tiny code model not available on ARM.")::llvm::llvm_unreachable_internal("Tiny code model not available on ARM."
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 10962)
;
10963 case CodeModel::Small:
10964 case CodeModel::Medium:
10965 case CodeModel::Kernel:
10966 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
10967 .add(predOps(ARMCC::AL))
10968 .addExternalSymbol("__chkstk")
10969 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
10970 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
10971 .addReg(ARM::R12,
10972 RegState::Implicit | RegState::Define | RegState::Dead)
10973 .addReg(ARM::CPSR,
10974 RegState::Implicit | RegState::Define | RegState::Dead);
10975 break;
10976 case CodeModel::Large: {
10977 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
10978 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
10979
10980 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
10981 .addExternalSymbol("__chkstk");
10982 BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
10983 .add(predOps(ARMCC::AL))
10984 .addReg(Reg, RegState::Kill)
10985 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
10986 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
10987 .addReg(ARM::R12,
10988 RegState::Implicit | RegState::Define | RegState::Dead)
10989 .addReg(ARM::CPSR,
10990 RegState::Implicit | RegState::Define | RegState::Dead);
10991 break;
10992 }
10993 }
10994
10995 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
10996 .addReg(ARM::SP, RegState::Kill)
10997 .addReg(ARM::R4, RegState::Kill)
10998 .setMIFlags(MachineInstr::FrameSetup)
10999 .add(predOps(ARMCC::AL))
11000 .add(condCodeOp());
11001
11002 MI.eraseFromParent();
11003 return MBB;
11004}
11005
11006MachineBasicBlock *
11007ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11008 MachineBasicBlock *MBB) const {
11009 DebugLoc DL = MI.getDebugLoc();
11010 MachineFunction *MF = MBB->getParent();
11011 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11012
11013 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11014 MF->insert(++MBB->getIterator(), ContBB);
11015 ContBB->splice(ContBB->begin(), MBB,
11016 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11017 ContBB->transferSuccessorsAndUpdatePHIs(MBB);
11018 MBB->addSuccessor(ContBB);
11019
11020 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11021 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11022 MF->push_back(TrapBB);
11023 MBB->addSuccessor(TrapBB);
11024
11025 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11026 .addReg(MI.getOperand(0).getReg())
11027 .addImm(0)
11028 .add(predOps(ARMCC::AL));
11029 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11030 .addMBB(TrapBB)
11031 .addImm(ARMCC::EQ)
11032 .addReg(ARM::CPSR);
11033
11034 MI.eraseFromParent();
11035 return ContBB;
11036}
11037
11038// The CPSR operand of SelectItr might be missing a kill marker
11039// because there were multiple uses of CPSR, and ISel didn't know
11040// which to mark. Figure out whether SelectItr should have had a
11041// kill marker, and set it if it should. Returns the correct kill
11042// marker value.
11043static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
11044 MachineBasicBlock* BB,
11045 const TargetRegisterInfo* TRI) {
11046 // Scan forward through BB for a use/def of CPSR.
11047 MachineBasicBlock::iterator miI(std::next(SelectItr));
11048 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11049 const MachineInstr& mi = *miI;
11050 if (mi.readsRegister(ARM::CPSR))
11051 return false;
11052 if (mi.definesRegister(ARM::CPSR))
11053 break; // Should have kill-flag - update below.
11054 }
11055
11056 // If we hit the end of the block, check whether CPSR is live into a
11057 // successor.
11058 if (miI == BB->end()) {
11059 for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
11060 sEnd = BB->succ_end();
11061 sItr != sEnd; ++sItr) {
11062 MachineBasicBlock* succ = *sItr;
11063 if (succ->isLiveIn(ARM::CPSR))
11064 return false;
11065 }
11066 }
11067
11068 // We found a def, or hit the end of the basic block and CPSR wasn't live
11069 // out. SelectMI should have a kill flag on CPSR.
11070 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11071 return true;
11072}
11073
11074MachineBasicBlock *
11075ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11076 MachineBasicBlock *BB) const {
11077 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11078 DebugLoc dl = MI.getDebugLoc();
11079 bool isThumb2 = Subtarget->isThumb2();
11080 switch (MI.getOpcode()) {
11081 default: {
11082 MI.print(errs());
11083 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11083)
;
11084 }
11085
11086 // Thumb1 post-indexed loads are really just single-register LDMs.
11087 case ARM::tLDR_postidx: {
11088 MachineOperand Def(MI.getOperand(1));
11089 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11090 .add(Def) // Rn_wb
11091 .add(MI.getOperand(2)) // Rn
11092 .add(MI.getOperand(3)) // PredImm
11093 .add(MI.getOperand(4)) // PredReg
11094 .add(MI.getOperand(0)) // Rt
11095 .cloneMemRefs(MI);
11096 MI.eraseFromParent();
11097 return BB;
11098 }
11099
11100 // The Thumb2 pre-indexed stores have the same MI operands, they just
11101 // define them differently in the .td files from the isel patterns, so
11102 // they need pseudos.
11103 case ARM::t2STR_preidx:
11104 MI.setDesc(TII->get(ARM::t2STR_PRE));
11105 return BB;
11106 case ARM::t2STRB_preidx:
11107 MI.setDesc(TII->get(ARM::t2STRB_PRE));
11108 return BB;
11109 case ARM::t2STRH_preidx:
11110 MI.setDesc(TII->get(ARM::t2STRH_PRE));
11111 return BB;
11112
11113 case ARM::STRi_preidx:
11114 case ARM::STRBi_preidx: {
11115 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11116 : ARM::STRB_PRE_IMM;
11117 // Decode the offset.
11118 unsigned Offset = MI.getOperand(4).getImm();
11119 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
11120 Offset = ARM_AM::getAM2Offset(Offset);
11121 if (isSub)
11122 Offset = -Offset;
11123
11124 MachineMemOperand *MMO = *MI.memoperands_begin();
11125 BuildMI(*BB, MI, dl, TII->get(NewOpc))
11126 .add(MI.getOperand(0)) // Rn_wb
11127 .add(MI.getOperand(1)) // Rt
11128 .add(MI.getOperand(2)) // Rn
11129 .addImm(Offset) // offset (skip GPR==zero_reg)
11130 .add(MI.getOperand(5)) // pred
11131 .add(MI.getOperand(6))
11132 .addMemOperand(MMO);
11133 MI.eraseFromParent();
11134 return BB;
11135 }
11136 case ARM::STRr_preidx:
11137 case ARM::STRBr_preidx:
11138 case ARM::STRH_preidx: {
11139 unsigned NewOpc;
11140 switch (MI.getOpcode()) {
11141 default: llvm_unreachable("unexpected opcode!")::llvm::llvm_unreachable_internal("unexpected opcode!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11141)
;
11142 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
11143 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
11144 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
11145 }
11146 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
11147 for (unsigned i = 0; i < MI.getNumOperands(); ++i)
11148 MIB.add(MI.getOperand(i));
11149 MI.eraseFromParent();
11150 return BB;
11151 }
11152
11153 case ARM::tMOVCCr_pseudo: {
11154 // To "insert" a SELECT_CC instruction, we actually have to insert the
11155 // diamond control-flow pattern. The incoming instruction knows the
11156 // destination vreg to set, the condition code register to branch on, the
11157 // true/false values to select between, and a branch opcode to use.
11158 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11159 MachineFunction::iterator It = ++BB->getIterator();
11160
11161 // thisMBB:
11162 // ...
11163 // TrueVal = ...
11164 // cmpTY ccX, r1, r2
11165 // bCC copy1MBB
11166 // fallthrough --> copy0MBB
11167 MachineBasicBlock *thisMBB = BB;
11168 MachineFunction *F = BB->getParent();
11169 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11170 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11171 F->insert(It, copy0MBB);
11172 F->insert(It, sinkMBB);
11173
11174 // Check whether CPSR is live past the tMOVCCr_pseudo.
11175 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
11176 if (!MI.killsRegister(ARM::CPSR) &&
11177 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
11178 copy0MBB->addLiveIn(ARM::CPSR);
11179 sinkMBB->addLiveIn(ARM::CPSR);
11180 }
11181
11182 // Transfer the remainder of BB and its successor edges to sinkMBB.
11183 sinkMBB->splice(sinkMBB->begin(), BB,
11184 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11185 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11186
11187 BB->addSuccessor(copy0MBB);
11188 BB->addSuccessor(sinkMBB);
11189
11190 BuildMI(BB, dl, TII->get(ARM::tBcc))
11191 .addMBB(sinkMBB)
11192 .addImm(MI.getOperand(3).getImm())
11193 .addReg(MI.getOperand(4).getReg());
11194
11195 // copy0MBB:
11196 // %FalseValue = ...
11197 // # fallthrough to sinkMBB
11198 BB = copy0MBB;
11199
11200 // Update machine-CFG edges
11201 BB->addSuccessor(sinkMBB);
11202
11203 // sinkMBB:
11204 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11205 // ...
11206 BB = sinkMBB;
11207 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
11208 .addReg(MI.getOperand(1).getReg())
11209 .addMBB(copy0MBB)
11210 .addReg(MI.getOperand(2).getReg())
11211 .addMBB(thisMBB);
11212
11213 MI.eraseFromParent(); // The pseudo instruction is gone now.
11214 return BB;
11215 }
11216
11217 case ARM::BCCi64:
11218 case ARM::BCCZi64: {
11219 // If there is an unconditional branch to the other successor, remove it.
11220 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
11221
11222 // Compare both parts that make up the double comparison separately for
11223 // equality.
11224 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
11225
11226 Register LHS1 = MI.getOperand(1).getReg();
11227 Register LHS2 = MI.getOperand(2).getReg();
11228 if (RHSisZero) {
11229 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
11230 .addReg(LHS1)
11231 .addImm(0)
11232 .add(predOps(ARMCC::AL));
11233 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
11234 .addReg(LHS2).addImm(0)
11235 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
11236 } else {
11237 Register RHS1 = MI.getOperand(3).getReg();
11238 Register RHS2 = MI.getOperand(4).getReg();
11239 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
11240 .addReg(LHS1)
11241 .addReg(RHS1)
11242 .add(predOps(ARMCC::AL));
11243 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
11244 .addReg(LHS2).addReg(RHS2)
11245 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
11246 }
11247
11248 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
11249 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
11250 if (MI.getOperand(0).getImm() == ARMCC::NE)
11251 std::swap(destMBB, exitMBB);
11252
11253 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
11254 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
11255 if (isThumb2)
11256 BuildMI(BB, dl, TII->get(ARM::t2B))
11257 .addMBB(exitMBB)
11258 .add(predOps(ARMCC::AL));
11259 else
11260 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
11261
11262 MI.eraseFromParent(); // The pseudo instruction is gone now.
11263 return BB;
11264 }
11265
11266 case ARM::Int_eh_sjlj_setjmp:
11267 case ARM::Int_eh_sjlj_setjmp_nofp:
11268 case ARM::tInt_eh_sjlj_setjmp:
11269 case ARM::t2Int_eh_sjlj_setjmp:
11270 case ARM::t2Int_eh_sjlj_setjmp_nofp:
11271 return BB;
11272
11273 case ARM::Int_eh_sjlj_setup_dispatch:
11274 EmitSjLjDispatchBlock(MI, BB);
11275 return BB;
11276
11277 case ARM::ABS:
11278 case ARM::t2ABS: {
11279 // To insert an ABS instruction, we have to insert the
11280 // diamond control-flow pattern. The incoming instruction knows the
11281 // source vreg to test against 0, the destination vreg to set,
11282 // the condition code register to branch on, the
11283 // true/false values to select between, and a branch opcode to use.
11284 // It transforms
11285 // V1 = ABS V0
11286 // into
11287 // V2 = MOVS V0
11288 // BCC (branch to SinkBB if V0 >= 0)
11289 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
11290 // SinkBB: V1 = PHI(V2, V3)
11291 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11292 MachineFunction::iterator BBI = ++BB->getIterator();
11293 MachineFunction *Fn = BB->getParent();
11294 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
11295 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
11296 Fn->insert(BBI, RSBBB);
11297 Fn->insert(BBI, SinkBB);
11298
11299 Register ABSSrcReg = MI.getOperand(1).getReg();
11300 Register ABSDstReg = MI.getOperand(0).getReg();
11301 bool ABSSrcKIll = MI.getOperand(1).isKill();
11302 bool isThumb2 = Subtarget->isThumb2();
11303 MachineRegisterInfo &MRI = Fn->getRegInfo();
11304 // In Thumb mode S must not be specified if source register is the SP or
11305 // PC and if destination register is the SP, so restrict register class
11306 Register NewRsbDstReg = MRI.createVirtualRegister(
11307 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
11308
11309 // Transfer the remainder of BB and its successor edges to sinkMBB.
11310 SinkBB->splice(SinkBB->begin(), BB,
11311 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11312 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
11313
11314 BB->addSuccessor(RSBBB);
11315 BB->addSuccessor(SinkBB);
11316
11317 // fall through to SinkMBB
11318 RSBBB->addSuccessor(SinkBB);
11319
11320 // insert a cmp at the end of BB
11321 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
11322 .addReg(ABSSrcReg)
11323 .addImm(0)
11324 .add(predOps(ARMCC::AL));
11325
11326 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
11327 BuildMI(BB, dl,
11328 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
11329 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
11330
11331 // insert rsbri in RSBBB
11332 // Note: BCC and rsbri will be converted into predicated rsbmi
11333 // by if-conversion pass
11334 BuildMI(*RSBBB, RSBBB->begin(), dl,
11335 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
11336 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
11337 .addImm(0)
11338 .add(predOps(ARMCC::AL))
11339 .add(condCodeOp());
11340
11341 // insert PHI in SinkBB,
11342 // reuse ABSDstReg to not change uses of ABS instruction
11343 BuildMI(*SinkBB, SinkBB->begin(), dl,
11344 TII->get(ARM::PHI), ABSDstReg)
11345 .addReg(NewRsbDstReg).addMBB(RSBBB)
11346 .addReg(ABSSrcReg).addMBB(BB);
11347
11348 // remove ABS instruction
11349 MI.eraseFromParent();
11350
11351 // return last added BB
11352 return SinkBB;
11353 }
11354 case ARM::COPY_STRUCT_BYVAL_I32:
11355 ++NumLoopByVals;
11356 return EmitStructByval(MI, BB);
11357 case ARM::WIN__CHKSTK:
11358 return EmitLowered__chkstk(MI, BB);
11359 case ARM::WIN__DBZCHK:
11360 return EmitLowered__dbzchk(MI, BB);
11361 }
11362}
11363
11364/// Attaches vregs to MEMCPY that it will use as scratch registers
11365/// when it is expanded into LDM/STM. This is done as a post-isel lowering
11366/// instead of as a custom inserter because we need the use list from the SDNode.
11367static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
11368 MachineInstr &MI, const SDNode *Node) {
11369 bool isThumb1 = Subtarget->isThumb1Only();
11370
11371 DebugLoc DL = MI.getDebugLoc();
11372 MachineFunction *MF = MI.getParent()->getParent();
11373 MachineRegisterInfo &MRI = MF->getRegInfo();
11374 MachineInstrBuilder MIB(*MF, MI);
11375
11376 // If the new dst/src is unused mark it as dead.
11377 if (!Node->hasAnyUseOfValue(0)) {
11378 MI.getOperand(0).setIsDead(true);
11379 }
11380 if (!Node->hasAnyUseOfValue(1)) {
11381 MI.getOperand(1).setIsDead(true);
11382 }
11383
11384 // The MEMCPY both defines and kills the scratch registers.
11385 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
11386 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
11387 : &ARM::GPRRegClass);
11388 MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
11389 }
11390}
11391
11392void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
11393 SDNode *Node) const {
11394 if (MI.getOpcode() == ARM::MEMCPY) {
11395 attachMEMCPYScratchRegs(Subtarget, MI, Node);
11396 return;
11397 }
11398
11399 const MCInstrDesc *MCID = &MI.getDesc();
11400 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
11401 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
11402 // operand is still set to noreg. If needed, set the optional operand's
11403 // register to CPSR, and remove the redundant implicit def.
11404 //
11405 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
11406
11407 // Rename pseudo opcodes.
11408 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
11409 unsigned ccOutIdx;
11410 if (NewOpc) {
11411 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
11412 MCID = &TII->get(NewOpc);
11413
11414 assert(MCID->getNumOperands() ==((MCID->getNumOperands() == MI.getDesc().getNumOperands() +
5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? static_cast<void> (0) : __assert_fail
("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11417, __PRETTY_FUNCTION__))
11415 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()((MCID->getNumOperands() == MI.getDesc().getNumOperands() +
5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? static_cast<void> (0) : __assert_fail
("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11417, __PRETTY_FUNCTION__))
11416 && "converted opcode should be the same except for cc_out"((MCID->getNumOperands() == MI.getDesc().getNumOperands() +
5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? static_cast<void> (0) : __assert_fail
("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11417, __PRETTY_FUNCTION__))
11417 " (and, on Thumb1, pred)")((MCID->getNumOperands() == MI.getDesc().getNumOperands() +
5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? static_cast<void> (0) : __assert_fail
("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11417, __PRETTY_FUNCTION__))
;
11418
11419 MI.setDesc(*MCID);
11420
11421 // Add the optional cc_out operand
11422 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
11423
11424 // On Thumb1, move all input operands to the end, then add the predicate
11425 if (Subtarget->isThumb1Only()) {
11426 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
11427 MI.addOperand(MI.getOperand(1));
11428 MI.RemoveOperand(1);
11429 }
11430
11431 // Restore the ties
11432 for (unsigned i = MI.getNumOperands(); i--;) {
11433 const MachineOperand& op = MI.getOperand(i);
11434 if (op.isReg() && op.isUse()) {
11435 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
11436 if (DefIdx != -1)
11437 MI.tieOperands(DefIdx, i);
11438 }
11439 }
11440
11441 MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
11442 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
11443 ccOutIdx = 1;
11444 } else
11445 ccOutIdx = MCID->getNumOperands() - 1;
11446 } else
11447 ccOutIdx = MCID->getNumOperands() - 1;
11448
11449 // Any ARM instruction that sets the 's' bit should specify an optional
11450 // "cc_out" operand in the last operand position.
11451 if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
11452 assert(!NewOpc && "Optional cc_out operand required")((!NewOpc && "Optional cc_out operand required") ? static_cast
<void> (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11452, __PRETTY_FUNCTION__))
;
11453 return;
11454 }
11455 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
11456 // since we already have an optional CPSR def.
11457 bool definesCPSR = false;
11458 bool deadCPSR = false;
11459 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
11460 ++i) {
11461 const MachineOperand &MO = MI.getOperand(i);
11462 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
11463 definesCPSR = true;
11464 if (MO.isDead())
11465 deadCPSR = true;
11466 MI.RemoveOperand(i);
11467 break;
11468 }
11469 }
11470 if (!definesCPSR) {
11471 assert(!NewOpc && "Optional cc_out operand required")((!NewOpc && "Optional cc_out operand required") ? static_cast
<void> (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11471, __PRETTY_FUNCTION__))
;
11472 return;
11473 }
11474 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag")((deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"
) ? static_cast<void> (0) : __assert_fail ("deadCPSR == !Node->hasAnyUseOfValue(1) && \"inconsistent dead flag\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11474, __PRETTY_FUNCTION__))
;
11475 if (deadCPSR) {
11476 assert(!MI.getOperand(ccOutIdx).getReg() &&((!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"
) ? static_cast<void> (0) : __assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11477, __PRETTY_FUNCTION__))
11477 "expect uninitialized optional cc_out operand")((!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"
) ? static_cast<void> (0) : __assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11477, __PRETTY_FUNCTION__))
;
11478 // Thumb1 instructions must have the S bit even if the CPSR is dead.
11479 if (!Subtarget->isThumb1Only())
11480 return;
11481 }
11482
11483 // If this instruction was defined with an optional CPSR def and its dag node
11484 // had a live implicit CPSR def, then activate the optional CPSR def.
11485 MachineOperand &MO = MI.getOperand(ccOutIdx);
11486 MO.setReg(ARM::CPSR);
11487 MO.setIsDef(true);
11488}
11489
11490//===----------------------------------------------------------------------===//
11491// ARM Optimization Hooks
11492//===----------------------------------------------------------------------===//
11493
11494// Helper function that checks if N is a null or all ones constant.
11495static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
11496 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
11497}
11498
11499// Return true if N is conditionally 0 or all ones.
11500// Detects these expressions where cc is an i1 value:
11501//
11502// (select cc 0, y) [AllOnes=0]
11503// (select cc y, 0) [AllOnes=0]
11504// (zext cc) [AllOnes=0]
11505// (sext cc) [AllOnes=0/1]
11506// (select cc -1, y) [AllOnes=1]
11507// (select cc y, -1) [AllOnes=1]
11508//
11509// Invert is set when N is the null/all ones constant when CC is false.
11510// OtherOp is set to the alternative value of N.
11511static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
11512 SDValue &CC, bool &Invert,
11513 SDValue &OtherOp,
11514 SelectionDAG &DAG) {
11515 switch (N->getOpcode()) {
11516 default: return false;
11517 case ISD::SELECT: {
11518 CC = N->getOperand(0);
11519 SDValue N1 = N->getOperand(1);
11520 SDValue N2 = N->getOperand(2);
11521 if (isZeroOrAllOnes(N1, AllOnes)) {
11522 Invert = false;
11523 OtherOp = N2;
11524 return true;
11525 }
11526 if (isZeroOrAllOnes(N2, AllOnes)) {
11527 Invert = true;
11528 OtherOp = N1;
11529 return true;
11530 }
11531 return false;
11532 }
11533 case ISD::ZERO_EXTEND:
11534 // (zext cc) can never be the all ones value.
11535 if (AllOnes)
11536 return false;
11537 LLVM_FALLTHROUGH[[gnu::fallthrough]];
11538 case ISD::SIGN_EXTEND: {
11539 SDLoc dl(N);
11540 EVT VT = N->getValueType(0);
11541 CC = N->getOperand(0);
11542 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
11543 return false;
11544 Invert = !AllOnes;
11545 if (AllOnes)
11546 // When looking for an AllOnes constant, N is an sext, and the 'other'
11547 // value is 0.
11548 OtherOp = DAG.getConstant(0, dl, VT);
11549 else if (N->getOpcode() == ISD::ZERO_EXTEND)
11550 // When looking for a 0 constant, N can be zext or sext.
11551 OtherOp = DAG.getConstant(1, dl, VT);
11552 else
11553 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
11554 VT);
11555 return true;
11556 }
11557 }
11558}
11559
11560// Combine a constant select operand into its use:
11561//
11562// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
11563// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
11564// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
11565// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
11566// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
11567//
11568// The transform is rejected if the select doesn't have a constant operand that
11569// is null, or all ones when AllOnes is set.
11570//
11571// Also recognize sext/zext from i1:
11572//
11573// (add (zext cc), x) -> (select cc (add x, 1), x)
11574// (add (sext cc), x) -> (select cc (add x, -1), x)
11575//
11576// These transformations eventually create predicated instructions.
11577//
11578// @param N The node to transform.
11579// @param Slct The N operand that is a select.
11580// @param OtherOp The other N operand (x above).
11581// @param DCI Context.
11582// @param AllOnes Require the select constant to be all ones instead of null.
11583// @returns The new node, or SDValue() on failure.
11584static
11585SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
11586 TargetLowering::DAGCombinerInfo &DCI,
11587 bool AllOnes = false) {
11588 SelectionDAG &DAG = DCI.DAG;
11589 EVT VT = N->getValueType(0);
11590 SDValue NonConstantVal;
11591 SDValue CCOp;
11592 bool SwapSelectOps;
11593 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
11594 NonConstantVal, DAG))
11595 return SDValue();
11596
11597 // Slct is now know to be the desired identity constant when CC is true.
11598 SDValue TrueVal = OtherOp;
11599 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
11600 OtherOp, NonConstantVal);
11601 // Unless SwapSelectOps says CC should be false.
11602 if (SwapSelectOps)
11603 std::swap(TrueVal, FalseVal);
11604
11605 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
11606 CCOp, TrueVal, FalseVal);
11607}
11608
11609// Attempt combineSelectAndUse on each operand of a commutative operator N.
11610static
11611SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
11612 TargetLowering::DAGCombinerInfo &DCI) {
11613 SDValue N0 = N->getOperand(0);
11614 SDValue N1 = N->getOperand(1);
11615 if (N0.getNode()->hasOneUse())
11616 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
11617 return Result;
11618 if (N1.getNode()->hasOneUse())
11619 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
11620 return Result;
11621 return SDValue();
11622}
11623
11624static bool IsVUZPShuffleNode(SDNode *N) {
11625 // VUZP shuffle node.
11626 if (N->getOpcode() == ARMISD::VUZP)
11627 return true;
11628
11629 // "VUZP" on i32 is an alias for VTRN.
11630 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
11631 return true;
11632
11633 return false;
11634}
11635
11636static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
11637 TargetLowering::DAGCombinerInfo &DCI,
11638 const ARMSubtarget *Subtarget) {
11639 // Look for ADD(VUZP.0, VUZP.1).
11640 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
11641 N0 == N1)
11642 return SDValue();
11643
11644 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
11645 if (!N->getValueType(0).is64BitVector())
11646 return SDValue();
11647
11648 // Generate vpadd.
11649 SelectionDAG &DAG = DCI.DAG;
11650 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11651 SDLoc dl(N);
11652 SDNode *Unzip = N0.getNode();
11653 EVT VT = N->getValueType(0);
11654
11655 SmallVector<SDValue, 8> Ops;
11656 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
11657 TLI.getPointerTy(DAG.getDataLayout())));
11658 Ops.push_back(Unzip->getOperand(0));
11659 Ops.push_back(Unzip->getOperand(1));
11660
11661 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
11662}
11663
11664static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
11665 TargetLowering::DAGCombinerInfo &DCI,
11666 const ARMSubtarget *Subtarget) {
11667 // Check for two extended operands.
11668 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
11669 N1.getOpcode() == ISD::SIGN_EXTEND) &&
11670 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
11671 N1.getOpcode() == ISD::ZERO_EXTEND))
11672 return SDValue();
11673
11674 SDValue N00 = N0.getOperand(0);
11675 SDValue N10 = N1.getOperand(0);
11676
11677 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
11678 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
11679 N00 == N10)
11680 return SDValue();
11681
11682 // We only recognize Q register paddl here; this can't be reached until
11683 // after type legalization.
11684 if (!N00.getValueType().is64BitVector() ||
11685 !N0.getValueType().is128BitVector())
11686 return SDValue();
11687
11688 // Generate vpaddl.
11689 SelectionDAG &DAG = DCI.DAG;
11690 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11691 SDLoc dl(N);
11692 EVT VT = N->getValueType(0);
11693
11694 SmallVector<SDValue, 8> Ops;
11695 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
11696 unsigned Opcode;
11697 if (N0.getOpcode() == ISD::SIGN_EXTEND)
11698 Opcode = Intrinsic::arm_neon_vpaddls;
11699 else
11700 Opcode = Intrinsic::arm_neon_vpaddlu;
11701 Ops.push_back(DAG.getConstant(Opcode, dl,
11702 TLI.getPointerTy(DAG.getDataLayout())));
11703 EVT ElemTy = N00.getValueType().getVectorElementType();
11704 unsigned NumElts = VT.getVectorNumElements();
11705 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
11706 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
11707 N00.getOperand(0), N00.getOperand(1));
11708 Ops.push_back(Concat);
11709
11710 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
11711}
11712
11713// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
11714// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
11715// much easier to match.
11716static SDValue
11717AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
11718 TargetLowering::DAGCombinerInfo &DCI,
11719 const ARMSubtarget *Subtarget) {
11720 // Only perform optimization if after legalize, and if NEON is available. We
11721 // also expected both operands to be BUILD_VECTORs.
11722 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
11723 || N0.getOpcode() != ISD::BUILD_VECTOR
11724 || N1.getOpcode() != ISD::BUILD_VECTOR)
11725 return SDValue();
11726
11727 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
11728 EVT VT = N->getValueType(0);
11729 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
11730 return SDValue();
11731
11732 // Check that the vector operands are of the right form.
11733 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
11734 // operands, where N is the size of the formed vector.
11735 // Each EXTRACT_VECTOR should have the same input vector and odd or even
11736 // index such that we have a pair wise add pattern.
11737
11738 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
11739 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11740 return SDValue();
11741 SDValue Vec = N0->getOperand(0)->getOperand(0);
11742 SDNode *V = Vec.getNode();
11743 unsigned nextIndex = 0;
11744
11745 // For each operands to the ADD which are BUILD_VECTORs,
11746 // check to see if each of their operands are an EXTRACT_VECTOR with
11747 // the same vector and appropriate index.
11748 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
11749 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
11750 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
11751
11752 SDValue ExtVec0 = N0->getOperand(i);
11753 SDValue ExtVec1 = N1->getOperand(i);
11754
11755 // First operand is the vector, verify its the same.
11756 if (V != ExtVec0->getOperand(0).getNode() ||
11757 V != ExtVec1->getOperand(0).getNode())
11758 return SDValue();
11759
11760 // Second is the constant, verify its correct.
11761 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
11762 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
11763
11764 // For the constant, we want to see all the even or all the odd.
11765 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
11766 || C1->getZExtValue() != nextIndex+1)
11767 return SDValue();
11768
11769 // Increment index.
11770 nextIndex+=2;
11771 } else
11772 return SDValue();
11773 }
11774
11775 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
11776 // we're using the entire input vector, otherwise there's a size/legality
11777 // mismatch somewhere.
11778 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
11779 Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
11780 return SDValue();
11781
11782 // Create VPADDL node.
11783 SelectionDAG &DAG = DCI.DAG;
11784 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11785
11786 SDLoc dl(N);
11787
11788 // Build operand list.
11789 SmallVector<SDValue, 8> Ops;
11790 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
11791 TLI.getPointerTy(DAG.getDataLayout())));
11792
11793 // Input is the vector.
11794 Ops.push_back(Vec);
11795
11796 // Get widened type and narrowed type.
11797 MVT widenType;
11798 unsigned numElem = VT.getVectorNumElements();
11799
11800 EVT inputLaneType = Vec.getValueType().getVectorElementType();
11801 switch (inputLaneType.getSimpleVT().SimpleTy) {
11802 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
11803 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
11804 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
11805 default:
11806 llvm_unreachable("Invalid vector element type for padd optimization.")::llvm::llvm_unreachable_internal("Invalid vector element type for padd optimization."
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11806)
;
11807 }
11808
11809 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
11810 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
11811 return DAG.getNode(ExtOp, dl, VT, tmp);
11812}
11813
11814static SDValue findMUL_LOHI(SDValue V) {
11815 if (V->getOpcode() == ISD::UMUL_LOHI ||
11816 V->getOpcode() == ISD::SMUL_LOHI)
11817 return V;
11818 return SDValue();
11819}
11820
11821static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
11822 TargetLowering::DAGCombinerInfo &DCI,
11823 const ARMSubtarget *Subtarget) {
11824 if (!Subtarget->hasBaseDSP())
11825 return SDValue();
11826
11827 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
11828 // accumulates the product into a 64-bit value. The 16-bit values will
11829 // be sign extended somehow or SRA'd into 32-bit values
11830 // (addc (adde (mul 16bit, 16bit), lo), hi)
11831 SDValue Mul = AddcNode->getOperand(0);
11832 SDValue Lo = AddcNode->getOperand(1);
11833 if (Mul.getOpcode() != ISD::MUL) {
11834 Lo = AddcNode->getOperand(0);
11835 Mul = AddcNode->getOperand(1);
11836 if (Mul.getOpcode() != ISD::MUL)
11837 return SDValue();
11838 }
11839
11840 SDValue SRA = AddeNode->getOperand(0);
11841 SDValue Hi = AddeNode->getOperand(1);
11842 if (SRA.getOpcode() != ISD::SRA) {
11843 SRA = AddeNode->getOperand(1);
11844 Hi = AddeNode->getOperand(0);
11845 if (SRA.getOpcode() != ISD::SRA)
11846 return SDValue();
11847 }
11848 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
11849 if (Const->getZExtValue() != 31)
11850 return SDValue();
11851 } else
11852 return SDValue();
11853
11854 if (SRA.getOperand(0) != Mul)
11855 return SDValue();
11856
11857 SelectionDAG &DAG = DCI.DAG;
11858 SDLoc dl(AddcNode);
11859 unsigned Opcode = 0;
11860 SDValue Op0;
11861 SDValue Op1;
11862
11863 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
11864 Opcode = ARMISD::SMLALBB;
11865 Op0 = Mul.getOperand(0);
11866 Op1 = Mul.getOperand(1);
11867 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
11868 Opcode = ARMISD::SMLALBT;
11869 Op0 = Mul.getOperand(0);
11870 Op1 = Mul.getOperand(1).getOperand(0);
11871 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
11872 Opcode = ARMISD::SMLALTB;
11873 Op0 = Mul.getOperand(0).getOperand(0);
11874 Op1 = Mul.getOperand(1);
11875 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
11876 Opcode = ARMISD::SMLALTT;
11877 Op0 = Mul->getOperand(0).getOperand(0);
11878 Op1 = Mul->getOperand(1).getOperand(0);
11879 }
11880
11881 if (!Op0 || !Op1)
11882 return SDValue();
11883
11884 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
11885 Op0, Op1, Lo, Hi);
11886 // Replace the ADDs' nodes uses by the MLA node's values.
11887 SDValue HiMLALResult(SMLAL.getNode(), 1);
11888 SDValue LoMLALResult(SMLAL.getNode(), 0);
11889
11890 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
11891 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
11892
11893 // Return original node to notify the driver to stop replacing.
11894 SDValue resNode(AddcNode, 0);
11895 return resNode;
11896}
11897
11898static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
11899 TargetLowering::DAGCombinerInfo &DCI,
11900 const ARMSubtarget *Subtarget) {
11901 // Look for multiply add opportunities.
11902 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
11903 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
11904 // a glue link from the first add to the second add.
11905 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
11906 // a S/UMLAL instruction.
11907 // UMUL_LOHI
11908 // / :lo \ :hi
11909 // V \ [no multiline comment]
11910 // loAdd -> ADDC |
11911 // \ :carry /
11912 // V V
11913 // ADDE <- hiAdd
11914 //
11915 // In the special case where only the higher part of a signed result is used
11916 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
11917 // a constant with the exact value of 0x80000000, we recognize we are dealing
11918 // with a "rounded multiply and add" (or subtract) and transform it into
11919 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
11920
11921 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||(((AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode
->getOpcode() == ARMISD::SUBE) && "Expect an ADDE or SUBE"
) ? static_cast<void> (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11923, __PRETTY_FUNCTION__))
11922 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&(((AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode
->getOpcode() == ARMISD::SUBE) && "Expect an ADDE or SUBE"
) ? static_cast<void> (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11923, __PRETTY_FUNCTION__))
11923 "Expect an ADDE or SUBE")(((AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode
->getOpcode() == ARMISD::SUBE) && "Expect an ADDE or SUBE"
) ? static_cast<void> (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11923, __PRETTY_FUNCTION__))
;
11924
11925 assert(AddeSubeNode->getNumOperands() == 3 &&((AddeSubeNode->getNumOperands() == 3 && AddeSubeNode
->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11927, __PRETTY_FUNCTION__))
11926 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&((AddeSubeNode->getNumOperands() == 3 && AddeSubeNode
->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11927, __PRETTY_FUNCTION__))
11927 "ADDE node has the wrong inputs")((AddeSubeNode->getNumOperands() == 3 && AddeSubeNode
->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"
) ? static_cast<void> (0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11927, __PRETTY_FUNCTION__))
;
11928
11929 // Check that we are chained to the right ADDC or SUBC node.
11930 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
11931 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
11932 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
11933 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
11934 AddcSubcNode->getOpcode() != ARMISD::SUBC))
11935 return SDValue();
11936
11937 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
11938 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
11939
11940 // Check if the two operands are from the same mul_lohi node.
11941 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
11942 return SDValue();
11943
11944 assert(AddcSubcNode->getNumValues() == 2 &&((AddcSubcNode->getNumValues() == 2 && AddcSubcNode
->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11946, __PRETTY_FUNCTION__))
11945 AddcSubcNode->getValueType(0) == MVT::i32 &&((AddcSubcNode->getNumValues() == 2 && AddcSubcNode
->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11946, __PRETTY_FUNCTION__))
11946 "Expect ADDC with two result values. First: i32")((AddcSubcNode->getNumValues() == 2 && AddcSubcNode
->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"
) ? static_cast<void> (0) : __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11946, __PRETTY_FUNCTION__))
;
11947
11948 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
11949 // maybe a SMLAL which multiplies two 16-bit values.
11950 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
11951 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
11952 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
11953 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
11954 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
11955 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
11956
11957 // Check for the triangle shape.
11958 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
11959 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
11960
11961 // Make sure that the ADDE/SUBE operands are not coming from the same node.
11962 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
11963 return SDValue();
11964
11965 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
11966 bool IsLeftOperandMUL = false;
11967 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
11968 if (MULOp == SDValue())
11969 MULOp = findMUL_LOHI(AddeSubeOp1);
11970 else
11971 IsLeftOperandMUL = true;
11972 if (MULOp == SDValue())
11973 return SDValue();
11974
11975 // Figure out the right opcode.
11976 unsigned Opc = MULOp->getOpcode();
11977 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
11978
11979 // Figure out the high and low input values to the MLAL node.
11980 SDValue *HiAddSub = nullptr;
11981 SDValue *LoMul = nullptr;
11982 SDValue *LowAddSub = nullptr;
11983
11984 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
11985 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
11986 return SDValue();
11987
11988 if (IsLeftOperandMUL)
11989 HiAddSub = &AddeSubeOp1;
11990 else
11991 HiAddSub = &AddeSubeOp0;
11992
11993 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
11994 // whose low result is fed to the ADDC/SUBC we are checking.
11995
11996 if (AddcSubcOp0 == MULOp.getValue(0)) {
11997 LoMul = &AddcSubcOp0;
11998 LowAddSub = &AddcSubcOp1;
11999 }
12000 if (AddcSubcOp1 == MULOp.getValue(0)) {
12001 LoMul = &AddcSubcOp1;
12002 LowAddSub = &AddcSubcOp0;
12003 }
12004
12005 if (!LoMul)
12006 return SDValue();
12007
12008 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12009 // the replacement below will create a cycle.
12010 if (AddcSubcNode == HiAddSub->getNode() ||
12011 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12012 return SDValue();
12013
12014 // Create the merged node.
12015 SelectionDAG &DAG = DCI.DAG;
12016
12017 // Start building operand list.
12018 SmallVector<SDValue, 8> Ops;
12019 Ops.push_back(LoMul->getOperand(0));
12020 Ops.push_back(LoMul->getOperand(1));
12021
12022 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
12023 // the case, we must be doing signed multiplication and only use the higher
12024 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
12025 // addition or subtraction with the value of 0x800000.
12026 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
12027 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
12028 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
12029 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
12030 0x80000000) {
12031 Ops.push_back(*HiAddSub);
12032 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
12033 FinalOpc = ARMISD::SMMLSR;
12034 } else {
12035 FinalOpc = ARMISD::SMMLAR;
12036 }
12037 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
12038 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
12039
12040 return SDValue(AddeSubeNode, 0);
12041 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
12042 // SMMLS is generated during instruction selection and the rest of this
12043 // function can not handle the case where AddcSubcNode is a SUBC.
12044 return SDValue();
12045
12046 // Finish building the operand list for {U/S}MLAL
12047 Ops.push_back(*LowAddSub);
12048 Ops.push_back(*HiAddSub);
12049
12050 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
12051 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12052
12053 // Replace the ADDs' nodes uses by the MLA node's values.
12054 SDValue HiMLALResult(MLALNode.getNode(), 1);
12055 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
12056
12057 SDValue LoMLALResult(MLALNode.getNode(), 0);
12058 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
12059
12060 // Return original node to notify the driver to stop replacing.
12061 return SDValue(AddeSubeNode, 0);
12062}
12063
12064static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
12065 TargetLowering::DAGCombinerInfo &DCI,
12066 const ARMSubtarget *Subtarget) {
12067 // UMAAL is similar to UMLAL except that it adds two unsigned values.
12068 // While trying to combine for the other MLAL nodes, first search for the
12069 // chance to use UMAAL. Check if Addc uses a node which has already
12070 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
12071 // as the addend, and it's handled in PerformUMLALCombine.
12072
12073 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12074 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12075
12076 // Check that we have a glued ADDC node.
12077 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
12078 if (AddcNode->getOpcode() != ARMISD::ADDC)
12079 return SDValue();
12080
12081 // Find the converted UMAAL or quit if it doesn't exist.
12082 SDNode *UmlalNode = nullptr;
12083 SDValue AddHi;
12084 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
12085 UmlalNode = AddcNode->getOperand(0).getNode();
12086 AddHi = AddcNode->getOperand(1);
12087 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
12088 UmlalNode = AddcNode->getOperand(1).getNode();
12089 AddHi = AddcNode->getOperand(0);
12090 } else {
12091 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12092 }
12093
12094 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
12095 // the ADDC as well as Zero.
12096 if (!isNullConstant(UmlalNode->getOperand(3)))
12097 return SDValue();
12098
12099 if ((isNullConstant(AddeNode->getOperand(0)) &&
12100 AddeNode->getOperand(1).getNode() == UmlalNode) ||
12101 (AddeNode->getOperand(0).getNode() == UmlalNode &&
12102 isNullConstant(AddeNode->getOperand(1)))) {
12103 SelectionDAG &DAG = DCI.DAG;
12104 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
12105 UmlalNode->getOperand(2), AddHi };
12106 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
12107 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12108
12109 // Replace the ADDs' nodes uses by the UMAAL node's values.
12110 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
12111 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
12112
12113 // Return original node to notify the driver to stop replacing.
12114 return SDValue(AddeNode, 0);
12115 }
12116 return SDValue();
12117}
12118
12119static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
12120 const ARMSubtarget *Subtarget) {
12121 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12122 return SDValue();
12123
12124 // Check that we have a pair of ADDC and ADDE as operands.
12125 // Both addends of the ADDE must be zero.
12126 SDNode* AddcNode = N->getOperand(2).getNode();
12127 SDNode* AddeNode = N->getOperand(3).getNode();
12128 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
12129 (AddeNode->getOpcode() == ARMISD::ADDE) &&
12130 isNullConstant(AddeNode->getOperand(0)) &&
12131 isNullConstant(AddeNode->getOperand(1)) &&
12132 (AddeNode->getOperand(2).getNode() == AddcNode))
12133 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
12134 DAG.getVTList(MVT::i32, MVT::i32),
12135 {N->getOperand(0), N->getOperand(1),
12136 AddcNode->getOperand(0), AddcNode->getOperand(1)});
12137 else
12138 return SDValue();
12139}
12140
12141static SDValue PerformAddcSubcCombine(SDNode *N,
12142 TargetLowering::DAGCombinerInfo &DCI,
12143 const ARMSubtarget *Subtarget) {
12144 SelectionDAG &DAG(DCI.DAG);
12145
12146 if (N->getOpcode() == ARMISD::SUBC) {
12147 // (SUBC (ADDE 0, 0, C), 1) -> C
12148 SDValue LHS = N->getOperand(0);
12149 SDValue RHS = N->getOperand(1);
12150 if (LHS->getOpcode() == ARMISD::ADDE &&
12151 isNullConstant(LHS->getOperand(0)) &&
12152 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
12153 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
12154 }
12155 }
12156
12157 if (Subtarget->isThumb1Only()) {
12158 SDValue RHS = N->getOperand(1);
12159 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
12160 int32_t imm = C->getSExtValue();
12161 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
12162 SDLoc DL(N);
12163 RHS = DAG.getConstant(-imm, DL, MVT::i32);
12164 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
12165 : ARMISD::ADDC;
12166 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
12167 }
12168 }
12169 }
12170
12171 return SDValue();
12172}
12173
12174static SDValue PerformAddeSubeCombine(SDNode *N,
12175 TargetLowering::DAGCombinerInfo &DCI,
12176 const ARMSubtarget *Subtarget) {
12177 if (Subtarget->isThumb1Only()) {
12178 SelectionDAG &DAG = DCI.DAG;
12179 SDValue RHS = N->getOperand(1);
12180 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
12181 int64_t imm = C->getSExtValue();
12182 if (imm < 0) {
12183 SDLoc DL(N);
12184
12185 // The with-carry-in form matches bitwise not instead of the negation.
12186 // Effectively, the inverse interpretation of the carry flag already
12187 // accounts for part of the negation.
12188 RHS = DAG.getConstant(~imm, DL, MVT::i32);
12189
12190 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
12191 : ARMISD::ADDE;
12192 return DAG.getNode(Opcode, DL, N->getVTList(),
12193 N->getOperand(0), RHS, N->getOperand(2));
12194 }
12195 }
12196 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
12197 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
12198 }
12199 return SDValue();
12200}
12201
12202static SDValue PerformSELECTCombine(SDNode *N,
12203 TargetLowering::DAGCombinerInfo &DCI,
12204 const ARMSubtarget *Subtarget) {
12205 if (!Subtarget->hasMVEIntegerOps())
12206 return SDValue();
12207
12208 SDLoc dl(N);
12209 SDValue SetCC;
12210 SDValue LHS;
12211 SDValue RHS;
12212 ISD::CondCode CC;
12213 SDValue TrueVal;
12214 SDValue FalseVal;
12215
12216 if (N->getOpcode() == ISD::SELECT &&
12217 N->getOperand(0)->getOpcode() == ISD::SETCC) {
12218 SetCC = N->getOperand(0);
12219 LHS = SetCC->getOperand(0);
12220 RHS = SetCC->getOperand(1);
12221 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
12222 TrueVal = N->getOperand(1);
12223 FalseVal = N->getOperand(2);
12224 } else if (N->getOpcode() == ISD::SELECT_CC) {
12225 LHS = N->getOperand(0);
12226 RHS = N->getOperand(1);
12227 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
12228 TrueVal = N->getOperand(2);
12229 FalseVal = N->getOperand(3);
12230 } else {
12231 return SDValue();
12232 }
12233
12234 unsigned int Opcode = 0;
12235 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
12236 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
12237 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
12238 Opcode = ARMISD::VMINVu;
12239 if (CC == ISD::SETUGT)
12240 std::swap(TrueVal, FalseVal);
12241 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
12242 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
12243 (CC == ISD::SETLT || CC == ISD::SETGT)) {
12244 Opcode = ARMISD::VMINVs;
12245 if (CC == ISD::SETGT)
12246 std::swap(TrueVal, FalseVal);
12247 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
12248 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
12249 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
12250 Opcode = ARMISD::VMAXVu;
12251 if (CC == ISD::SETULT)
12252 std::swap(TrueVal, FalseVal);
12253 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
12254 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
12255 (CC == ISD::SETGT || CC == ISD::SETLT)) {
12256 Opcode = ARMISD::VMAXVs;
12257 if (CC == ISD::SETLT)
12258 std::swap(TrueVal, FalseVal);
12259 } else
12260 return SDValue();
12261
12262 // Normalise to the right hand side being the vector reduction
12263 switch (TrueVal->getOpcode()) {
12264 case ISD::VECREDUCE_UMIN:
12265 case ISD::VECREDUCE_SMIN:
12266 case ISD::VECREDUCE_UMAX:
12267 case ISD::VECREDUCE_SMAX:
12268 std::swap(LHS, RHS);
12269 std::swap(TrueVal, FalseVal);
12270 break;
12271 }
12272
12273 EVT VectorType = FalseVal->getOperand(0).getValueType();
12274
12275 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
12276 VectorType != MVT::v4i32)
12277 return SDValue();
12278
12279 EVT VectorScalarType = VectorType.getVectorElementType();
12280
12281 // The values being selected must also be the ones being compared
12282 if (TrueVal != LHS || FalseVal != RHS)
12283 return SDValue();
12284
12285 EVT LeftType = LHS->getValueType(0);
12286 EVT RightType = RHS->getValueType(0);
12287
12288 // The types must match the reduced type too
12289 if (LeftType != VectorScalarType || RightType != VectorScalarType)
12290 return SDValue();
12291
12292 // Legalise the scalar to an i32
12293 if (VectorScalarType != MVT::i32)
12294 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
12295
12296 // Generate the reduction as an i32 for legalisation purposes
12297 auto Reduction =
12298 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
12299
12300 // The result isn't actually an i32 so truncate it back to its original type
12301 if (VectorScalarType != MVT::i32)
12302 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
12303
12304 return Reduction;
12305}
12306
12307// A special combine for the vqdmulh family of instructions. This is one of the
12308// potential set of patterns that could patch this instruction. The base pattern
12309// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
12310// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
12311// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
12312// the max is unnecessary.
12313static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
12314 EVT VT = N->getValueType(0);
12315 SDValue Shft;
12316 ConstantSDNode *Clamp;
12317
12318 if (N->getOpcode() == ISD::SMIN) {
12319 Shft = N->getOperand(0);
12320 Clamp = isConstOrConstSplat(N->getOperand(1));
12321 } else if (N->getOpcode() == ISD::VSELECT) {
12322 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
12323 SDValue Cmp = N->getOperand(0);
12324 if (Cmp.getOpcode() != ISD::SETCC ||
12325 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
12326 Cmp.getOperand(0) != N->getOperand(1) ||
12327 Cmp.getOperand(1) != N->getOperand(2))
12328 return SDValue();
12329 Shft = N->getOperand(1);
12330 Clamp = isConstOrConstSplat(N->getOperand(2));
12331 } else
12332 return SDValue();
12333
12334 if (!Clamp)
12335 return SDValue();
12336
12337 MVT ScalarType;
12338 int ShftAmt = 0;
12339 switch (Clamp->getSExtValue()) {
12340 case (1 << 7) - 1:
12341 ScalarType = MVT::i8;
12342 ShftAmt = 7;
12343 break;
12344 case (1 << 15) - 1:
12345 ScalarType = MVT::i16;
12346 ShftAmt = 15;
12347 break;
12348 case (1ULL << 31) - 1:
12349 ScalarType = MVT::i32;
12350 ShftAmt = 31;
12351 break;
12352 default:
12353 return SDValue();
12354 }
12355
12356 if (Shft.getOpcode() != ISD::SRA)
12357 return SDValue();
12358 ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
12359 if (!N1 || N1->getSExtValue() != ShftAmt)
12360 return SDValue();
12361
12362 SDValue Mul = Shft.getOperand(0);
12363 if (Mul.getOpcode() != ISD::MUL)
12364 return SDValue();
12365
12366 SDValue Ext0 = Mul.getOperand(0);
12367 SDValue Ext1 = Mul.getOperand(1);
12368 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
12369 Ext1.getOpcode() != ISD::SIGN_EXTEND)
12370 return SDValue();
12371 EVT VecVT = Ext0.getOperand(0).getValueType();
12372 if (VecVT != MVT::v4i32 && VecVT != MVT::v8i16 && VecVT != MVT::v16i8)
12373 return SDValue();
12374 if (Ext1.getOperand(0).getValueType() != VecVT ||
12375 VecVT.getScalarType() != ScalarType ||
12376 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
12377 return SDValue();
12378
12379 SDLoc DL(Mul);
12380 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, VecVT, Ext0.getOperand(0),
12381 Ext1.getOperand(0));
12382 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, VQDMULH);
12383}
12384
12385static SDValue PerformVSELECTCombine(SDNode *N,
12386 TargetLowering::DAGCombinerInfo &DCI,
12387 const ARMSubtarget *Subtarget) {
12388 if (!Subtarget->hasMVEIntegerOps())
12389 return SDValue();
12390
12391 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
12392 return V;
12393
12394 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
12395 //
12396 // We need to re-implement this optimization here as the implementation in the
12397 // Target-Independent DAGCombiner does not handle the kind of constant we make
12398 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
12399 // good reason, allowing truncation there would break other targets).
12400 //
12401 // Currently, this is only done for MVE, as it's the only target that benefits
12402 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
12403 if (N->getOperand(0).getOpcode() != ISD::XOR)
12404 return SDValue();
12405 SDValue XOR = N->getOperand(0);
12406
12407 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
12408 // It is important to check with truncation allowed as the BUILD_VECTORs we
12409 // generate in those situations will truncate their operands.
12410 ConstantSDNode *Const =
12411 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
12412 /*AllowTruncation*/ true);
12413 if (!Const || !Const->isOne())
12414 return SDValue();
12415
12416 // Rewrite into vselect(cond, rhs, lhs).
12417 SDValue Cond = XOR->getOperand(0);
12418 SDValue LHS = N->getOperand(1);
12419 SDValue RHS = N->getOperand(2);
12420 EVT Type = N->getValueType(0);
12421 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
12422}
12423
12424static SDValue PerformABSCombine(SDNode *N,
12425 TargetLowering::DAGCombinerInfo &DCI,
12426 const ARMSubtarget *Subtarget) {
12427 SDValue res;
12428 SelectionDAG &DAG = DCI.DAG;
12429 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12430
12431 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
12432 return SDValue();
12433
12434 if (!TLI.expandABS(N, res, DAG))
12435 return SDValue();
12436
12437 return res;
12438}
12439
12440/// PerformADDECombine - Target-specific dag combine transform from
12441/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
12442/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
12443static SDValue PerformADDECombine(SDNode *N,
12444 TargetLowering::DAGCombinerInfo &DCI,
12445 const ARMSubtarget *Subtarget) {
12446 // Only ARM and Thumb2 support UMLAL/SMLAL.
12447 if (Subtarget->isThumb1Only())
12448 return PerformAddeSubeCombine(N, DCI, Subtarget);
12449
12450 // Only perform the checks after legalize when the pattern is available.
12451 if (DCI.isBeforeLegalize()) return SDValue();
12452
12453 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
12454}
12455
12456/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
12457/// operands N0 and N1. This is a helper for PerformADDCombine that is
12458/// called with the default operands, and if that fails, with commuted
12459/// operands.
12460static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
12461 TargetLowering::DAGCombinerInfo &DCI,
12462 const ARMSubtarget *Subtarget){
12463 // Attempt to create vpadd for this add.
12464 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
12465 return Result;
12466
12467 // Attempt to create vpaddl for this add.
12468 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
12469 return Result;
12470 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
12471 Subtarget))
12472 return Result;
12473
12474 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12475 if (N0.getNode()->hasOneUse())
12476 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
12477 return Result;
12478 return SDValue();
12479}
12480
12481static SDValue PerformADDVecReduce(SDNode *N,
12482 TargetLowering::DAGCombinerInfo &DCI,
12483 const ARMSubtarget *Subtarget) {
12484 if (!Subtarget->hasMVEIntegerOps() || N->getValueType(0) != MVT::i64)
12485 return SDValue();
12486
12487 SDValue N0 = N->getOperand(0);
12488 SDValue N1 = N->getOperand(1);
12489
12490 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
12491 // will look like:
12492 // t1: i32,i32 = ARMISD::VADDLVs x
12493 // t2: i64 = build_pair t1, t1:1
12494 // t3: i64 = add t2, y
12495 // We also need to check for sext / zext and commutitive adds.
12496 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
12497 SDValue NB) {
12498 if (NB->getOpcode() != ISD::BUILD_PAIR)
12499 return SDValue();
12500 SDValue VecRed = NB->getOperand(0);
12501 if (VecRed->getOpcode() != Opcode || VecRed.getResNo() != 0 ||
12502 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
12503 return SDValue();
12504
12505 SDLoc dl(N);
12506 SmallVector<SDValue, 4> Ops;
12507 Ops.push_back(DCI.DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
12508 DCI.DAG.getConstant(0, dl, MVT::i32)));
12509 Ops.push_back(DCI.DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
12510 DCI.DAG.getConstant(1, dl, MVT::i32)));
12511 for (unsigned i = 0, e = VecRed.getNumOperands(); i < e; i++)
12512 Ops.push_back(VecRed->getOperand(i));
12513 SDValue Red = DCI.DAG.getNode(OpcodeA, dl,
12514 DCI.DAG.getVTList({MVT::i32, MVT::i32}), Ops);
12515 return DCI.DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
12516 SDValue(Red.getNode(), 1));
12517 };
12518
12519 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
12520 return M;
12521 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
12522 return M;
12523 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
12524 return M;
12525 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
12526 return M;
12527 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
12528 return M;
12529 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
12530 return M;
12531 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
12532 return M;
12533 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
12534 return M;
12535 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
12536 return M;
12537 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
12538 return M;
12539 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
12540 return M;
12541 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
12542 return M;
12543 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
12544 return M;
12545 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
12546 return M;
12547 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
12548 return M;
12549 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
12550 return M;
12551 return SDValue();
12552}
12553
12554bool
12555ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
12556 CombineLevel Level) const {
12557 if (Level == BeforeLegalizeTypes)
12558 return true;
12559
12560 if (N->getOpcode() != ISD::SHL)
12561 return true;
12562
12563 if (Subtarget->isThumb1Only()) {
12564 // Avoid making expensive immediates by commuting shifts. (This logic
12565 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
12566 // for free.)
12567 if (N->getOpcode() != ISD::SHL)
12568 return true;
12569 SDValue N1 = N->getOperand(0);
12570 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
12571 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
12572 return true;
12573 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
12574 if (Const->getAPIntValue().ult(256))
12575 return false;
12576 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
12577 Const->getAPIntValue().sgt(-256))
12578 return false;
12579 }
12580 return true;
12581 }
12582
12583 // Turn off commute-with-shift transform after legalization, so it doesn't
12584 // conflict with PerformSHLSimplify. (We could try to detect when
12585 // PerformSHLSimplify would trigger more precisely, but it isn't
12586 // really necessary.)
12587 return false;
12588}
12589
12590bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
12591 const SDNode *N, CombineLevel Level) const {
12592 if (!Subtarget->isThumb1Only())
12593 return true;
12594
12595 if (Level == BeforeLegalizeTypes)
12596 return true;
12597
12598 return false;
12599}
12600
12601bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
12602 if (!Subtarget->hasNEON()) {
12603 if (Subtarget->isThumb1Only())
12604 return VT.getScalarSizeInBits() <= 32;
12605 return true;
12606 }
12607 return VT.isScalarInteger();
12608}
12609
12610static SDValue PerformSHLSimplify(SDNode *N,
12611 TargetLowering::DAGCombinerInfo &DCI,
12612 const ARMSubtarget *ST) {
12613 // Allow the generic combiner to identify potential bswaps.
12614 if (DCI.isBeforeLegalize())
12615 return SDValue();
12616
12617 // DAG combiner will fold:
12618 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
12619 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
12620 // Other code patterns that can be also be modified have the following form:
12621 // b + ((a << 1) | 510)
12622 // b + ((a << 1) & 510)
12623 // b + ((a << 1) ^ 510)
12624 // b + ((a << 1) + 510)
12625
12626 // Many instructions can perform the shift for free, but it requires both
12627 // the operands to be registers. If c1 << c2 is too large, a mov immediate
12628 // instruction will needed. So, unfold back to the original pattern if:
12629 // - if c1 and c2 are small enough that they don't require mov imms.
12630 // - the user(s) of the node can perform an shl
12631
12632 // No shifted operands for 16-bit instructions.
12633 if (ST->isThumb() && ST->isThumb1Only())
12634 return SDValue();
12635
12636 // Check that all the users could perform the shl themselves.
12637 for (auto U : N->uses()) {
12638 switch(U->getOpcode()) {
12639 default:
12640 return SDValue();
12641 case ISD::SUB:
12642 case ISD::ADD:
12643 case ISD::AND:
12644 case ISD::OR:
12645 case ISD::XOR:
12646 case ISD::SETCC:
12647 case ARMISD::CMP:
12648 // Check that the user isn't already using a constant because there
12649 // aren't any instructions that support an immediate operand and a
12650 // shifted operand.
12651 if (isa<ConstantSDNode>(U->getOperand(0)) ||
12652 isa<ConstantSDNode>(U->getOperand(1)))
12653 return SDValue();
12654
12655 // Check that it's not already using a shift.
12656 if (U->getOperand(0).getOpcode() == ISD::SHL ||
12657 U->getOperand(1).getOpcode() == ISD::SHL)
12658 return SDValue();
12659 break;
12660 }
12661 }
12662
12663 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
12664 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
12665 return SDValue();
12666
12667 if (N->getOperand(0).getOpcode() != ISD::SHL)
12668 return SDValue();
12669
12670 SDValue SHL = N->getOperand(0);
12671
12672 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
12673 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
12674 if (!C1ShlC2 || !C2)
12675 return SDValue();
12676
12677 APInt C2Int = C2->getAPIntValue();
12678 APInt C1Int = C1ShlC2->getAPIntValue();
12679
12680 // Check that performing a lshr will not lose any information.
12681 APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(),
12682 C2Int.getBitWidth() - C2->getZExtValue());
12683 if ((C1Int & Mask) != C1Int)
12684 return SDValue();
12685
12686 // Shift the first constant.
12687 C1Int.lshrInPlace(C2Int);
12688
12689 // The immediates are encoded as an 8-bit value that can be rotated.
12690 auto LargeImm = [](const APInt &Imm) {
12691 unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
12692 return Imm.getBitWidth() - Zeros > 8;
12693 };
12694
12695 if (LargeImm(C1Int) || LargeImm(C2Int))
12696 return SDValue();
12697
12698 SelectionDAG &DAG = DCI.DAG;
12699 SDLoc dl(N);
12700 SDValue X = SHL.getOperand(0);
12701 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
12702 DAG.getConstant(C1Int, dl, MVT::i32));
12703 // Shift left to compensate for the lshr of C1Int.
12704 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
12705
12706 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
12707 SHL.dump(); N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
;
12708 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Into:\n"; X.dump(); BinOp.dump
(); Res.dump(); } } while (false)
;
12709 return Res;
12710}
12711
12712
12713/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
12714///
12715static SDValue PerformADDCombine(SDNode *N,
12716 TargetLowering::DAGCombinerInfo &DCI,
12717 const ARMSubtarget *Subtarget) {
12718 SDValue N0 = N->getOperand(0);
12719 SDValue N1 = N->getOperand(1);
12720
12721 // Only works one way, because it needs an immediate operand.
12722 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
12723 return Result;
12724
12725 if (SDValue Result = PerformADDVecReduce(N, DCI, Subtarget))
12726 return Result;
12727
12728 // First try with the default operand order.
12729 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
12730 return Result;
12731
12732 // If that didn't work, try again with the operands commuted.
12733 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
12734}
12735
12736/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
12737///
12738static SDValue PerformSUBCombine(SDNode *N,
12739 TargetLowering::DAGCombinerInfo &DCI,
12740 const ARMSubtarget *Subtarget) {
12741 SDValue N0 = N->getOperand(0);
12742 SDValue N1 = N->getOperand(1);
12743
12744 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12745 if (N1.getNode()->hasOneUse())
12746 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
12747 return Result;
12748
12749 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
12750 return SDValue();
12751
12752 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
12753 // so that we can readily pattern match more mve instructions which can use
12754 // a scalar operand.
12755 SDValue VDup = N->getOperand(1);
12756 if (VDup->getOpcode() != ARMISD::VDUP)
12757 return SDValue();
12758
12759 SDValue VMov = N->getOperand(0);
12760 if (VMov->getOpcode() == ISD::BITCAST)
12761 VMov = VMov->getOperand(0);
12762
12763 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
12764 return SDValue();
12765
12766 SDLoc dl(N);
12767 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
12768 DCI.DAG.getConstant(0, dl, MVT::i32),
12769 VDup->getOperand(0));
12770 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
12771}
12772
12773/// PerformVMULCombine
12774/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
12775/// special multiplier accumulator forwarding.
12776/// vmul d3, d0, d2
12777/// vmla d3, d1, d2
12778/// is faster than
12779/// vadd d3, d0, d1
12780/// vmul d3, d3, d2
12781// However, for (A + B) * (A + B),
12782// vadd d2, d0, d1
12783// vmul d3, d0, d2
12784// vmla d3, d1, d2
12785// is slower than
12786// vadd d2, d0, d1
12787// vmul d3, d2, d2
12788static SDValue PerformVMULCombine(SDNode *N,
12789 TargetLowering::DAGCombinerInfo &DCI,
12790 const ARMSubtarget *Subtarget) {
12791 if (!Subtarget->hasVMLxForwarding())
12792 return SDValue();
12793
12794 SelectionDAG &DAG = DCI.DAG;
12795 SDValue N0 = N->getOperand(0);
12796 SDValue N1 = N->getOperand(1);
12797 unsigned Opcode = N0.getOpcode();
12798 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
12799 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
12800 Opcode = N1.getOpcode();
12801 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
12802 Opcode != ISD::FADD && Opcode != ISD::FSUB)
12803 return SDValue();
12804 std::swap(N0, N1);
12805 }
12806
12807 if (N0 == N1)
12808 return SDValue();
12809
12810 EVT VT = N->getValueType(0);
12811 SDLoc DL(N);
12812 SDValue N00 = N0->getOperand(0);
12813 SDValue N01 = N0->getOperand(1);
12814 return DAG.getNode(Opcode, DL, VT,
12815 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
12816 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
12817}
12818
12819static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG,
12820 const ARMSubtarget *Subtarget) {
12821 EVT VT = N->getValueType(0);
12822 if (VT != MVT::v2i64)
12823 return SDValue();
12824
12825 SDValue N0 = N->getOperand(0);
12826 SDValue N1 = N->getOperand(1);
12827
12828 auto IsSignExt = [&](SDValue Op) {
12829 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
12830 return SDValue();
12831 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
12832 if (VT.getScalarSizeInBits() == 32)
12833 return Op->getOperand(0);
12834 return SDValue();
12835 };
12836 auto IsZeroExt = [&](SDValue Op) {
12837 // Zero extends are a little more awkward. At the point we are matching
12838 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
12839 // That might be before of after a bitcast depending on how the and is
12840 // placed. Because this has to look through bitcasts, it is currently only
12841 // supported on LE.
12842 if (!Subtarget->isLittle())
12843 return SDValue();
12844
12845 SDValue And = Op;
12846 if (And->getOpcode() == ISD::BITCAST)
12847 And = And->getOperand(0);
12848 if (And->getOpcode() != ISD::AND)
12849 return SDValue();
12850 SDValue Mask = And->getOperand(1);
12851 if (Mask->getOpcode() == ISD::BITCAST)
12852 Mask = Mask->getOperand(0);
12853
12854 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
12855 Mask.getValueType() != MVT::v4i32)
12856 return SDValue();
12857 if (isAllOnesConstant(Mask->getOperand(0)) &&
12858 isNullConstant(Mask->getOperand(1)) &&
12859 isAllOnesConstant(Mask->getOperand(2)) &&
12860 isNullConstant(Mask->getOperand(3)))
12861 return And->getOperand(0);
12862 return SDValue();
12863 };
12864
12865 SDLoc dl(N);
12866 if (SDValue Op0 = IsSignExt(N0)) {
12867 if (SDValue Op1 = IsSignExt(N1)) {
12868 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
12869 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
12870 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
12871 }
12872 }
12873 if (SDValue Op0 = IsZeroExt(N0)) {
12874 if (SDValue Op1 = IsZeroExt(N1)) {
12875 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
12876 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
12877 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
12878 }
12879 }
12880
12881 return SDValue();
12882}
12883
12884static SDValue PerformMULCombine(SDNode *N,
12885 TargetLowering::DAGCombinerInfo &DCI,
12886 const ARMSubtarget *Subtarget) {
12887 SelectionDAG &DAG = DCI.DAG;
12888
12889 EVT VT = N->getValueType(0);
12890 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
12891 return PerformMVEVMULLCombine(N, DAG, Subtarget);
12892
12893 if (Subtarget->isThumb1Only())
12894 return SDValue();
12895
12896 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
12897 return SDValue();
12898
12899 if (VT.is64BitVector() || VT.is128BitVector())
12900 return PerformVMULCombine(N, DCI, Subtarget);
12901 if (VT != MVT::i32)
12902 return SDValue();
12903
12904 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12905 if (!C)
12906 return SDValue();
12907
12908 int64_t MulAmt = C->getSExtValue();
12909 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
12910
12911 ShiftAmt = ShiftAmt & (32 - 1);
12912 SDValue V = N->getOperand(0);
12913 SDLoc DL(N);
12914
12915 SDValue Res;
12916 MulAmt >>= ShiftAmt;
12917
12918 if (MulAmt >= 0) {
12919 if (isPowerOf2_32(MulAmt - 1)) {
12920 // (mul x, 2^N + 1) => (add (shl x, N), x)
12921 Res = DAG.getNode(ISD::ADD, DL, VT,
12922 V,
12923 DAG.getNode(ISD::SHL, DL, VT,
12924 V,
12925 DAG.getConstant(Log2_32(MulAmt - 1), DL,
12926 MVT::i32)));
12927 } else if (isPowerOf2_32(MulAmt + 1)) {
12928 // (mul x, 2^N - 1) => (sub (shl x, N), x)
12929 Res = DAG.getNode(ISD::SUB, DL, VT,
12930 DAG.getNode(ISD::SHL, DL, VT,
12931 V,
12932 DAG.getConstant(Log2_32(MulAmt + 1), DL,
12933 MVT::i32)),
12934 V);
12935 } else
12936 return SDValue();
12937 } else {
12938 uint64_t MulAmtAbs = -MulAmt;
12939 if (isPowerOf2_32(MulAmtAbs + 1)) {
12940 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12941 Res = DAG.getNode(ISD::SUB, DL, VT,
12942 V,
12943 DAG.getNode(ISD::SHL, DL, VT,
12944 V,
12945 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
12946 MVT::i32)));
12947 } else if (isPowerOf2_32(MulAmtAbs - 1)) {
12948 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12949 Res = DAG.getNode(ISD::ADD, DL, VT,
12950 V,
12951 DAG.getNode(ISD::SHL, DL, VT,
12952 V,
12953 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
12954 MVT::i32)));
12955 Res = DAG.getNode(ISD::SUB, DL, VT,
12956 DAG.getConstant(0, DL, MVT::i32), Res);
12957 } else
12958 return SDValue();
12959 }
12960
12961 if (ShiftAmt != 0)
12962 Res = DAG.getNode(ISD::SHL, DL, VT,
12963 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
12964
12965 // Do not add new nodes to DAG combiner worklist.
12966 DCI.CombineTo(N, Res, false);
12967 return SDValue();
12968}
12969
12970static SDValue CombineANDShift(SDNode *N,
12971 TargetLowering::DAGCombinerInfo &DCI,
12972 const ARMSubtarget *Subtarget) {
12973 // Allow DAGCombine to pattern-match before we touch the canonical form.
12974 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
12975 return SDValue();
12976
12977 if (N->getValueType(0) != MVT::i32)
12978 return SDValue();
12979
12980 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12981 if (!N1C)
12982 return SDValue();
12983
12984 uint32_t C1 = (uint32_t)N1C->getZExtValue();
12985 // Don't transform uxtb/uxth.
12986 if (C1 == 255 || C1 == 65535)
12987 return SDValue();
12988
12989 SDNode *N0 = N->getOperand(0).getNode();
12990 if (!N0->hasOneUse())
12991 return SDValue();
12992
12993 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
12994 return SDValue();
12995
12996 bool LeftShift = N0->getOpcode() == ISD::SHL;
12997
12998 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12999 if (!N01C)
13000 return SDValue();
13001
13002 uint32_t C2 = (uint32_t)N01C->getZExtValue();
13003 if (!C2 || C2 >= 32)
13004 return SDValue();
13005
13006 // Clear irrelevant bits in the mask.
13007 if (LeftShift)
13008 C1 &= (-1U << C2);
13009 else
13010 C1 &= (-1U >> C2);
13011
13012 SelectionDAG &DAG = DCI.DAG;
13013 SDLoc DL(N);
13014
13015 // We have a pattern of the form "(and (shl x, c2) c1)" or
13016 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
13017 // transform to a pair of shifts, to save materializing c1.
13018
13019 // First pattern: right shift, then mask off leading bits.
13020 // FIXME: Use demanded bits?
13021 if (!LeftShift && isMask_32(C1)) {
13022 uint32_t C3 = countLeadingZeros(C1);
13023 if (C2 < C3) {
13024 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
13025 DAG.getConstant(C3 - C2, DL, MVT::i32));
13026 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
13027 DAG.getConstant(C3, DL, MVT::i32));
13028 }
13029 }
13030
13031 // First pattern, reversed: left shift, then mask off trailing bits.
13032 if (LeftShift && isMask_32(~C1)) {
13033 uint32_t C3 = countTrailingZeros(C1);
13034 if (C2 < C3) {
13035 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
13036 DAG.getConstant(C3 - C2, DL, MVT::i32));
13037 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
13038 DAG.getConstant(C3, DL, MVT::i32));
13039 }
13040 }
13041
13042 // Second pattern: left shift, then mask off leading bits.
13043 // FIXME: Use demanded bits?
13044 if (LeftShift && isShiftedMask_32(C1)) {
13045 uint32_t Trailing = countTrailingZeros(C1);
13046 uint32_t C3 = countLeadingZeros(C1);
13047 if (Trailing == C2 && C2 + C3 < 32) {
13048 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
13049 DAG.getConstant(C2 + C3, DL, MVT::i32));
13050 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
13051 DAG.getConstant(C3, DL, MVT::i32));
13052 }
13053 }
13054
13055 // Second pattern, reversed: right shift, then mask off trailing bits.
13056 // FIXME: Handle other patterns of known/demanded bits.
13057 if (!LeftShift && isShiftedMask_32(C1)) {
13058 uint32_t Leading = countLeadingZeros(C1);
13059 uint32_t C3 = countTrailingZeros(C1);
13060 if (Leading == C2 && C2 + C3 < 32) {
13061 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
13062 DAG.getConstant(C2 + C3, DL, MVT::i32));
13063 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
13064 DAG.getConstant(C3, DL, MVT::i32));
13065 }
13066 }
13067
13068 // FIXME: Transform "(and (shl x, c2) c1)" ->
13069 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
13070 // c1.
13071 return SDValue();
13072}
13073
13074static SDValue PerformANDCombine(SDNode *N,
13075 TargetLowering::DAGCombinerInfo &DCI,
13076 const ARMSubtarget *Subtarget) {
13077 // Attempt to use immediate-form VBIC
13078 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
13079 SDLoc dl(N);
13080 EVT VT = N->getValueType(0);
13081 SelectionDAG &DAG = DCI.DAG;
13082
13083 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v4i1 ||
13084 VT == MVT::v8i1 || VT == MVT::v16i1)
13085 return SDValue();
13086
13087 APInt SplatBits, SplatUndef;
13088 unsigned SplatBitSize;
13089 bool HasAnyUndefs;
13090 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
13091 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
13092 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
13093 SplatBitSize == 64) {
13094 EVT VbicVT;
13095 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
13096 SplatUndef.getZExtValue(), SplatBitSize,
13097 DAG, dl, VbicVT, VT, OtherModImm);
13098 if (Val.getNode()) {
13099 SDValue Input =
13100 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
13101 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
13102 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
13103 }
13104 }
13105 }
13106
13107 if (!Subtarget->isThumb1Only()) {
13108 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
13109 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
13110 return Result;
13111
13112 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13113 return Result;
13114 }
13115
13116 if (Subtarget->isThumb1Only())
13117 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
13118 return Result;
13119
13120 return SDValue();
13121}
13122
13123// Try combining OR nodes to SMULWB, SMULWT.
13124static SDValue PerformORCombineToSMULWBT(SDNode *OR,
13125 TargetLowering::DAGCombinerInfo &DCI,
13126 const ARMSubtarget *Subtarget) {
13127 if (!Subtarget->hasV6Ops() ||
13128 (Subtarget->isThumb() &&
13129 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
13130 return SDValue();
13131
13132 SDValue SRL = OR->getOperand(0);
13133 SDValue SHL = OR->getOperand(1);
13134
13135 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
13136 SRL = OR->getOperand(1);
13137 SHL = OR->getOperand(0);
13138 }
13139 if (!isSRL16(SRL) || !isSHL16(SHL))
13140 return SDValue();
13141
13142 // The first operands to the shifts need to be the two results from the
13143 // same smul_lohi node.
13144 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
13145 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
13146 return SDValue();
13147
13148 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
13149 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
13150 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
13151 return SDValue();
13152
13153 // Now we have:
13154 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
13155 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
13156 // For SMUWB the 16-bit value will signed extended somehow.
13157 // For SMULWT only the SRA is required.
13158 // Check both sides of SMUL_LOHI
13159 SDValue OpS16 = SMULLOHI->getOperand(0);
13160 SDValue OpS32 = SMULLOHI->getOperand(1);
13161
13162 SelectionDAG &DAG = DCI.DAG;
13163 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
13164 OpS16 = OpS32;
13165 OpS32 = SMULLOHI->getOperand(0);
13166 }
13167
13168 SDLoc dl(OR);
13169 unsigned Opcode = 0;
13170 if (isS16(OpS16, DAG))
13171 Opcode = ARMISD::SMULWB;
13172 else if (isSRA16(OpS16)) {
13173 Opcode = ARMISD::SMULWT;
13174 OpS16 = OpS16->getOperand(0);
13175 }
13176 else
13177 return SDValue();
13178
13179 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
13180 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
13181 return SDValue(OR, 0);
13182}
13183
13184static SDValue PerformORCombineToBFI(SDNode *N,
13185 TargetLowering::DAGCombinerInfo &DCI,
13186 const ARMSubtarget *Subtarget) {
13187 // BFI is only available on V6T2+
13188 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
13189 return SDValue();
13190
13191 EVT VT = N->getValueType(0);
13192 SDValue N0 = N->getOperand(0);
13193 SDValue N1 = N->getOperand(1);
13194 SelectionDAG &DAG = DCI.DAG;
13195 SDLoc DL(N);
13196 // 1) or (and A, mask), val => ARMbfi A, val, mask
13197 // iff (val & mask) == val
13198 //
13199 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
13200 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
13201 // && mask == ~mask2
13202 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
13203 // && ~mask == mask2
13204 // (i.e., copy a bitfield value into another bitfield of the same width)
13205
13206 if (VT != MVT::i32)
13207 return SDValue();
13208
13209 SDValue N00 = N0.getOperand(0);
13210
13211 // The value and the mask need to be constants so we can verify this is
13212 // actually a bitfield set. If the mask is 0xffff, we can do better
13213 // via a movt instruction, so don't use BFI in that case.
13214 SDValue MaskOp = N0.getOperand(1);
13215 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
13216 if (!MaskC)
13217 return SDValue();
13218 unsigned Mask = MaskC->getZExtValue();
13219 if (Mask == 0xffff)
13220 return SDValue();
13221 SDValue Res;
13222 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
13223 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
13224 if (N1C) {
13225 unsigned Val = N1C->getZExtValue();
13226 if ((Val & ~Mask) != Val)
13227 return SDValue();
13228
13229 if (ARM::isBitFieldInvertedMask(Mask)) {
13230 Val >>= countTrailingZeros(~Mask);
13231
13232 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
13233 DAG.getConstant(Val, DL, MVT::i32),
13234 DAG.getConstant(Mask, DL, MVT::i32));
13235
13236 DCI.CombineTo(N, Res, false);
13237 // Return value from the original node to inform the combiner than N is
13238 // now dead.
13239 return SDValue(N, 0);
13240 }
13241 } else if (N1.getOpcode() == ISD::AND) {
13242 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
13243 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
13244 if (!N11C)
13245 return SDValue();
13246 unsigned Mask2 = N11C->getZExtValue();
13247
13248 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
13249 // as is to match.
13250 if (ARM::isBitFieldInvertedMask(Mask) &&
13251 (Mask == ~Mask2)) {
13252 // The pack halfword instruction works better for masks that fit it,
13253 // so use that when it's available.
13254 if (Subtarget->hasDSP() &&
13255 (Mask == 0xffff || Mask == 0xffff0000))
13256 return SDValue();
13257 // 2a
13258 unsigned amt = countTrailingZeros(Mask2);
13259 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
13260 DAG.getConstant(amt, DL, MVT::i32));
13261 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
13262 DAG.getConstant(Mask, DL, MVT::i32));
13263 DCI.CombineTo(N, Res, false);
13264 // Return value from the original node to inform the combiner than N is
13265 // now dead.
13266 return SDValue(N, 0);
13267 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
13268 (~Mask == Mask2)) {
13269 // The pack halfword instruction works better for masks that fit it,
13270 // so use that when it's available.
13271 if (Subtarget->hasDSP() &&
13272 (Mask2 == 0xffff || Mask2 == 0xffff0000))
13273 return SDValue();
13274 // 2b
13275 unsigned lsb = countTrailingZeros(Mask);
13276 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
13277 DAG.getConstant(lsb, DL, MVT::i32));
13278 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
13279 DAG.getConstant(Mask2, DL, MVT::i32));
13280 DCI.CombineTo(N, Res, false);
13281 // Return value from the original node to inform the combiner than N is
13282 // now dead.
13283 return SDValue(N, 0);
13284 }
13285 }
13286
13287 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
13288 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
13289 ARM::isBitFieldInvertedMask(~Mask)) {
13290 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
13291 // where lsb(mask) == #shamt and masked bits of B are known zero.
13292 SDValue ShAmt = N00.getOperand(1);
13293 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
13294 unsigned LSB = countTrailingZeros(Mask);
13295 if (ShAmtC != LSB)
13296 return SDValue();
13297
13298 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
13299 DAG.getConstant(~Mask, DL, MVT::i32));
13300
13301 DCI.CombineTo(N, Res, false);
13302 // Return value from the original node to inform the combiner than N is
13303 // now dead.
13304 return SDValue(N, 0);
13305 }
13306
13307 return SDValue();
13308}
13309
13310static bool isValidMVECond(unsigned CC, bool IsFloat) {
13311 switch (CC) {
13312 case ARMCC::EQ:
13313 case ARMCC::NE:
13314 case ARMCC::LE:
13315 case ARMCC::GT:
13316 case ARMCC::GE:
13317 case ARMCC::LT:
13318 return true;
13319 case ARMCC::HS:
13320 case ARMCC::HI:
13321 return !IsFloat;
13322 default:
13323 return false;
13324 };
13325}
13326
13327static ARMCC::CondCodes getVCMPCondCode(SDValue N) {
13328 if (N->getOpcode() == ARMISD::VCMP)
13329 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
13330 else if (N->getOpcode() == ARMISD::VCMPZ)
13331 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
13332 else
13333 llvm_unreachable("Not a VCMP/VCMPZ!")::llvm::llvm_unreachable_internal("Not a VCMP/VCMPZ!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13333)
;
13334}
13335
13336static bool CanInvertMVEVCMP(SDValue N) {
13337 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N));
13338 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
13339}
13340
13341static SDValue PerformORCombine_i1(SDNode *N,
13342 TargetLowering::DAGCombinerInfo &DCI,
13343 const ARMSubtarget *Subtarget) {
13344 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
13345 // together with predicates
13346 EVT VT = N->getValueType(0);
13347 SDLoc DL(N);
13348 SDValue N0 = N->getOperand(0);
13349 SDValue N1 = N->getOperand(1);
13350
13351 auto IsFreelyInvertable = [&](SDValue V) {
13352 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
13353 return CanInvertMVEVCMP(V);
13354 return false;
13355 };
13356
13357 // At least one operand must be freely invertable.
13358 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
13359 return SDValue();
13360
13361 SDValue NewN0 = DCI.DAG.getLogicalNOT(DL, N0, VT);
13362 SDValue NewN1 = DCI.DAG.getLogicalNOT(DL, N1, VT);
13363 SDValue And = DCI.DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
13364 return DCI.DAG.getLogicalNOT(DL, And, VT);
13365}
13366
13367/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
13368static SDValue PerformORCombine(SDNode *N,
13369 TargetLowering::DAGCombinerInfo &DCI,
13370 const ARMSubtarget *Subtarget) {
13371 // Attempt to use immediate-form VORR
13372 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
13373 SDLoc dl(N);
13374 EVT VT = N->getValueType(0);
13375 SelectionDAG &DAG = DCI.DAG;
13376
13377 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13378 return SDValue();
13379
13380 if (Subtarget->hasMVEIntegerOps() &&
13381 (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
13382 return PerformORCombine_i1(N, DCI, Subtarget);
13383
13384 APInt SplatBits, SplatUndef;
13385 unsigned SplatBitSize;
13386 bool HasAnyUndefs;
13387 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
13388 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
13389 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
13390 SplatBitSize == 64) {
13391 EVT VorrVT;
13392 SDValue Val =
13393 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
13394 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
13395 if (Val.getNode()) {
13396 SDValue Input =
13397 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
13398 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
13399 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
13400 }
13401 }
13402 }
13403
13404 if (!Subtarget->isThumb1Only()) {
13405 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
13406 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
13407 return Result;
13408 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
13409 return Result;
13410 }
13411
13412 SDValue N0 = N->getOperand(0);
13413 SDValue N1 = N->getOperand(1);
13414
13415 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
13416 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
13417 DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
13418
13419 // The code below optimizes (or (and X, Y), Z).
13420 // The AND operand needs to have a single user to make these optimizations
13421 // profitable.
13422 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
13423 return SDValue();
13424
13425 APInt SplatUndef;
13426 unsigned SplatBitSize;
13427 bool HasAnyUndefs;
13428
13429 APInt SplatBits0, SplatBits1;
13430 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
13431 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
13432 // Ensure that the second operand of both ands are constants
13433 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
13434 HasAnyUndefs) && !HasAnyUndefs) {
13435 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
13436 HasAnyUndefs) && !HasAnyUndefs) {
13437 // Ensure that the bit width of the constants are the same and that
13438 // the splat arguments are logical inverses as per the pattern we
13439 // are trying to simplify.
13440 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
13441 SplatBits0 == ~SplatBits1) {
13442 // Canonicalize the vector type to make instruction selection
13443 // simpler.
13444 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
13445 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
13446 N0->getOperand(1),
13447 N0->getOperand(0),
13448 N1->getOperand(0));
13449 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
13450 }
13451 }
13452 }
13453 }
13454
13455 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
13456 // reasonable.
13457 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
13458 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
13459 return Res;
13460 }
13461
13462 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13463 return Result;
13464
13465 return SDValue();
13466}
13467
13468static SDValue PerformXORCombine(SDNode *N,
13469 TargetLowering::DAGCombinerInfo &DCI,
13470 const ARMSubtarget *Subtarget) {
13471 EVT VT = N->getValueType(0);
13472 SelectionDAG &DAG = DCI.DAG;
13473
13474 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13475 return SDValue();
13476
13477 if (!Subtarget->isThumb1Only()) {
13478 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
13479 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
13480 return Result;
13481
13482 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13483 return Result;
13484 }
13485
13486 if (Subtarget->hasMVEIntegerOps()) {
13487 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
13488 SDValue N0 = N->getOperand(0);
13489 SDValue N1 = N->getOperand(1);
13490 const TargetLowering *TLI = Subtarget->getTargetLowering();
13491 if (TLI->isConstTrueVal(N1.getNode()) &&
13492 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
13493 if (CanInvertMVEVCMP(N0)) {
13494 SDLoc DL(N0);
13495 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0));
13496
13497 SmallVector<SDValue, 4> Ops;
13498 Ops.push_back(N0->getOperand(0));
13499 if (N0->getOpcode() == ARMISD::VCMP)
13500 Ops.push_back(N0->getOperand(1));
13501 Ops.push_back(DCI.DAG.getConstant(CC, DL, MVT::i32));
13502 return DCI.DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
13503 }
13504 }
13505 }
13506
13507 return SDValue();
13508}
13509
13510// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
13511// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
13512// their position in "to" (Rd).
13513static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
13514 assert(N->getOpcode() == ARMISD::BFI)((N->getOpcode() == ARMISD::BFI) ? static_cast<void>
(0) : __assert_fail ("N->getOpcode() == ARMISD::BFI", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13514, __PRETTY_FUNCTION__))
;
13515
13516 SDValue From = N->getOperand(1);
13517 ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
13518 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
13519
13520 // If the Base came from a SHR #C, we can deduce that it is really testing bit
13521 // #C in the base of the SHR.
13522 if (From->getOpcode() == ISD::SRL &&
13523 isa<ConstantSDNode>(From->getOperand(1))) {
13524 APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
13525 assert(Shift.getLimitedValue() < 32 && "Shift too large!")((Shift.getLimitedValue() < 32 && "Shift too large!"
) ? static_cast<void> (0) : __assert_fail ("Shift.getLimitedValue() < 32 && \"Shift too large!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13525, __PRETTY_FUNCTION__))
;
13526 FromMask <<= Shift.getLimitedValue(31);
13527 From = From->getOperand(0);
13528 }
13529
13530 return From;
13531}
13532
13533// If A and B contain one contiguous set of bits, does A | B == A . B?
13534//
13535// Neither A nor B must be zero.
13536static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
13537 unsigned LastActiveBitInA = A.countTrailingZeros();
13538 unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
13539 return LastActiveBitInA - 1 == FirstActiveBitInB;
13540}
13541
13542static SDValue FindBFIToCombineWith(SDNode *N) {
13543 // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
13544 // if one exists.
13545 APInt ToMask, FromMask;
13546 SDValue From = ParseBFI(N, ToMask, FromMask);
13547 SDValue To = N->getOperand(0);
13548
13549 // Now check for a compatible BFI to merge with. We can pass through BFIs that
13550 // aren't compatible, but not if they set the same bit in their destination as
13551 // we do (or that of any BFI we're going to combine with).
13552 SDValue V = To;
13553 APInt CombinedToMask = ToMask;
13554 while (V.getOpcode() == ARMISD::BFI) {
13555 APInt NewToMask, NewFromMask;
13556 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
13557 if (NewFrom != From) {
13558 // This BFI has a different base. Keep going.
13559 CombinedToMask |= NewToMask;
13560 V = V.getOperand(0);
13561 continue;
13562 }
13563
13564 // Do the written bits conflict with any we've seen so far?
13565 if ((NewToMask & CombinedToMask).getBoolValue())
13566 // Conflicting bits - bail out because going further is unsafe.
13567 return SDValue();
13568
13569 // Are the new bits contiguous when combined with the old bits?
13570 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
13571 BitsProperlyConcatenate(FromMask, NewFromMask))
13572 return V;
13573 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
13574 BitsProperlyConcatenate(NewFromMask, FromMask))
13575 return V;
13576
13577 // We've seen a write to some bits, so track it.
13578 CombinedToMask |= NewToMask;
13579 // Keep going...
13580 V = V.getOperand(0);
13581 }
13582
13583 return SDValue();
13584}
13585
13586static SDValue PerformBFICombine(SDNode *N,
13587 TargetLowering::DAGCombinerInfo &DCI) {
13588 SDValue N1 = N->getOperand(1);
13589 if (N1.getOpcode() == ISD::AND) {
13590 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
13591 // the bits being cleared by the AND are not demanded by the BFI.
13592 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
13593 if (!N11C)
13594 return SDValue();
13595 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
13596 unsigned LSB = countTrailingZeros(~InvMask);
13597 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
13598 assert(Width <((Width < static_cast<unsigned>(std::numeric_limits<
unsigned>::digits) && "undefined behavior") ? static_cast
<void> (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13600, __PRETTY_FUNCTION__))
13599 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&((Width < static_cast<unsigned>(std::numeric_limits<
unsigned>::digits) && "undefined behavior") ? static_cast
<void> (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13600, __PRETTY_FUNCTION__))
13600 "undefined behavior")((Width < static_cast<unsigned>(std::numeric_limits<
unsigned>::digits) && "undefined behavior") ? static_cast
<void> (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13600, __PRETTY_FUNCTION__))
;
13601 unsigned Mask = (1u << Width) - 1;
13602 unsigned Mask2 = N11C->getZExtValue();
13603 if ((Mask & (~Mask2)) == 0)
13604 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
13605 N->getOperand(0), N1.getOperand(0),
13606 N->getOperand(2));
13607 } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
13608 // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
13609 // Keep track of any consecutive bits set that all come from the same base
13610 // value. We can combine these together into a single BFI.
13611 SDValue CombineBFI = FindBFIToCombineWith(N);
13612 if (CombineBFI == SDValue())
13613 return SDValue();
13614
13615 // We've found a BFI.
13616 APInt ToMask1, FromMask1;
13617 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
13618
13619 APInt ToMask2, FromMask2;
13620 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
13621 assert(From1 == From2)((From1 == From2) ? static_cast<void> (0) : __assert_fail
("From1 == From2", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13621, __PRETTY_FUNCTION__))
;
13622 (void)From2;
13623
13624 // First, unlink CombineBFI.
13625 DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
13626 // Then create a new BFI, combining the two together.
13627 APInt NewFromMask = FromMask1 | FromMask2;
13628 APInt NewToMask = ToMask1 | ToMask2;
13629
13630 EVT VT = N->getValueType(0);
13631 SDLoc dl(N);
13632
13633 if (NewFromMask[0] == 0)
13634 From1 = DCI.DAG.getNode(
13635 ISD::SRL, dl, VT, From1,
13636 DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
13637 return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
13638 DCI.DAG.getConstant(~NewToMask, dl, VT));
13639 }
13640 return SDValue();
13641}
13642
13643/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
13644/// ARMISD::VMOVRRD.
13645static SDValue PerformVMOVRRDCombine(SDNode *N,
13646 TargetLowering::DAGCombinerInfo &DCI,
13647 const ARMSubtarget *Subtarget) {
13648 // vmovrrd(vmovdrr x, y) -> x,y
13649 SDValue InDouble = N->getOperand(0);
13650 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
13651 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
13652
13653 // vmovrrd(load f64) -> (load i32), (load i32)
13654 SDNode *InNode = InDouble.getNode();
13655 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
13656 InNode->getValueType(0) == MVT::f64 &&
13657 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
13658 !cast<LoadSDNode>(InNode)->isVolatile()) {
13659 // TODO: Should this be done for non-FrameIndex operands?
13660 LoadSDNode *LD = cast<LoadSDNode>(InNode);
13661
13662 SelectionDAG &DAG = DCI.DAG;
13663 SDLoc DL(LD);
13664 SDValue BasePtr = LD->getBasePtr();
13665 SDValue NewLD1 =
13666 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
13667 LD->getAlignment(), LD->getMemOperand()->getFlags());
13668
13669 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
13670 DAG.getConstant(4, DL, MVT::i32));
13671
13672 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
13673 LD->getPointerInfo().getWithOffset(4),
13674 std::min(4U, LD->getAlignment()),
13675 LD->getMemOperand()->getFlags());
13676
13677 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
13678 if (DCI.DAG.getDataLayout().isBigEndian())
13679 std::swap (NewLD1, NewLD2);
13680 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
13681 return Result;
13682 }
13683
13684 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
13685 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13686 isa<ConstantSDNode>(InDouble.getOperand(1))) {
13687 SDValue BV = InDouble.getOperand(0);
13688 // Look up through any nop bitcasts
13689 while (BV.getOpcode() == ISD::BITCAST &&
13690 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64))
13691 BV = BV.getOperand(0);
13692 if (BV.getValueType() != MVT::v4i32 || BV.getOpcode() != ISD::BUILD_VECTOR)
13693 return SDValue();
13694 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
13695 if (Subtarget->isLittle())
13696 return DCI.DAG.getMergeValues(
13697 {BV.getOperand(Offset), BV.getOperand(Offset + 1)}, SDLoc(N));
13698 else
13699 return DCI.DAG.getMergeValues(
13700 {BV.getOperand(Offset + 1), BV.getOperand(Offset)}, SDLoc(N));
13701 }
13702
13703 return SDValue();
13704}
13705
13706/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
13707/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
13708static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
13709 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
13710 SDValue Op0 = N->getOperand(0);
13711 SDValue Op1 = N->getOperand(1);
13712 if (Op0.getOpcode() == ISD::BITCAST)
13713 Op0 = Op0.getOperand(0);
13714 if (Op1.getOpcode() == ISD::BITCAST)
13715 Op1 = Op1.getOperand(0);
13716 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
13717 Op0.getNode() == Op1.getNode() &&
13718 Op0.getResNo() == 0 && Op1.getResNo() == 1)
13719 return DAG.getNode(ISD::BITCAST, SDLoc(N),
13720 N->getValueType(0), Op0.getOperand(0));
13721 return SDValue();
13722}
13723
13724static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
13725 SDValue Op0 = N->getOperand(0);
13726
13727 // VMOVhr (VMOVrh (X)) -> X
13728 if (Op0->getOpcode() == ARMISD::VMOVrh)
13729 return Op0->getOperand(0);
13730
13731 // FullFP16: half values are passed in S-registers, and we don't
13732 // need any of the bitcast and moves:
13733 //
13734 // t2: f32,ch = CopyFromReg t0, Register:f32 %0
13735 // t5: i32 = bitcast t2
13736 // t18: f16 = ARMISD::VMOVhr t5
13737 if (Op0->getOpcode() == ISD::BITCAST) {
13738 SDValue Copy = Op0->getOperand(0);
13739 if (Copy.getValueType() == MVT::f32 &&
13740 Copy->getOpcode() == ISD::CopyFromReg) {
13741 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
13742 SDValue NewCopy =
13743 DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
13744 return NewCopy;
13745 }
13746 }
13747
13748 // fold (VMOVhr (load x)) -> (load (f16*)x)
13749 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
13750 if (LN0->hasOneUse() && LN0->isUnindexed() &&
13751 LN0->getMemoryVT() == MVT::i16) {
13752 SDValue Load =
13753 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
13754 LN0->getBasePtr(), LN0->getMemOperand());
13755 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
13756 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
13757 return Load;
13758 }
13759 }
13760
13761 // Only the bottom 16 bits of the source register are used.
13762 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
13763 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
13764 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
13765 return SDValue(N, 0);
13766
13767 return SDValue();
13768}
13769
13770static SDValue PerformVMOVrhCombine(SDNode *N,
13771 TargetLowering::DAGCombinerInfo &DCI) {
13772 SDValue N0 = N->getOperand(0);
13773 EVT VT = N->getValueType(0);
13774
13775 // fold (VMOVrh (fpconst x)) -> const x
13776 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
13777 APFloat V = C->getValueAPF();
13778 return DCI.DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
13779 }
13780
13781 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
13782 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
13783 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13784
13785 SDValue Load =
13786 DCI.DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
13787 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
13788 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
13789 DCI.DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13790 return Load;
13791 }
13792
13793 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
13794 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13795 isa<ConstantSDNode>(N0->getOperand(1)))
13796 return DCI.DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
13797 N0->getOperand(1));
13798
13799 return SDValue();
13800}
13801
13802/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
13803/// are normal, non-volatile loads. If so, it is profitable to bitcast an
13804/// i64 vector to have f64 elements, since the value can then be loaded
13805/// directly into a VFP register.
13806static bool hasNormalLoadOperand(SDNode *N) {
13807 unsigned NumElts = N->getValueType(0).getVectorNumElements();
13808 for (unsigned i = 0; i < NumElts; ++i) {
13809 SDNode *Elt = N->getOperand(i).getNode();
13810 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
13811 return true;
13812 }
13813 return false;
13814}
13815
13816/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
13817/// ISD::BUILD_VECTOR.
13818static SDValue PerformBUILD_VECTORCombine(SDNode *N,
13819 TargetLowering::DAGCombinerInfo &DCI,
13820 const ARMSubtarget *Subtarget) {
13821 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
13822 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
13823 // into a pair of GPRs, which is fine when the value is used as a scalar,
13824 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
13825 SelectionDAG &DAG = DCI.DAG;
13826 if (N->getNumOperands() == 2)
13827 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
13828 return RV;
13829
13830 // Load i64 elements as f64 values so that type legalization does not split
13831 // them up into i32 values.
13832 EVT VT = N->getValueType(0);
13833 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
13834 return SDValue();
13835 SDLoc dl(N);
13836 SmallVector<SDValue, 8> Ops;
13837 unsigned NumElts = VT.getVectorNumElements();
13838 for (unsigned i = 0; i < NumElts; ++i) {
13839 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
13840 Ops.push_back(V);
13841 // Make the DAGCombiner fold the bitcast.
13842 DCI.AddToWorklist(V.getNode());
13843 }
13844 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
13845 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
13846 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
13847}
13848
13849/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
13850static SDValue
13851PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
13852 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
13853 // At that time, we may have inserted bitcasts from integer to float.
13854 // If these bitcasts have survived DAGCombine, change the lowering of this
13855 // BUILD_VECTOR in something more vector friendly, i.e., that does not
13856 // force to use floating point types.
13857
13858 // Make sure we can change the type of the vector.
13859 // This is possible iff:
13860 // 1. The vector is only used in a bitcast to a integer type. I.e.,
13861 // 1.1. Vector is used only once.
13862 // 1.2. Use is a bit convert to an integer type.
13863 // 2. The size of its operands are 32-bits (64-bits are not legal).
13864 EVT VT = N->getValueType(0);
13865 EVT EltVT = VT.getVectorElementType();
13866
13867 // Check 1.1. and 2.
13868 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
13869 return SDValue();
13870
13871 // By construction, the input type must be float.
13872 assert(EltVT == MVT::f32 && "Unexpected type!")((EltVT == MVT::f32 && "Unexpected type!") ? static_cast
<void> (0) : __assert_fail ("EltVT == MVT::f32 && \"Unexpected type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 13872, __PRETTY_FUNCTION__))
;
13873
13874 // Check 1.2.
13875 SDNode *Use = *N->use_begin();
13876 if (Use->getOpcode() != ISD::BITCAST ||
13877 Use->getValueType(0).isFloatingPoint())
13878 return SDValue();
13879
13880 // Check profitability.
13881 // Model is, if more than half of the relevant operands are bitcast from
13882 // i32, turn the build_vector into a sequence of insert_vector_elt.
13883 // Relevant operands are everything that is not statically
13884 // (i.e., at compile time) bitcasted.
13885 unsigned NumOfBitCastedElts = 0;
13886 unsigned NumElts = VT.getVectorNumElements();
13887 unsigned NumOfRelevantElts = NumElts;
13888 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
13889 SDValue Elt = N->getOperand(Idx);
13890 if (Elt->getOpcode() == ISD::BITCAST) {
13891 // Assume only bit cast to i32 will go away.
13892 if (Elt->getOperand(0).getValueType() == MVT::i32)
13893 ++NumOfBitCastedElts;
13894 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
13895 // Constants are statically casted, thus do not count them as
13896 // relevant operands.
13897 --NumOfRelevantElts;
13898 }
13899
13900 // Check if more than half of the elements require a non-free bitcast.
13901 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
13902 return SDValue();
13903
13904 SelectionDAG &DAG = DCI.DAG;
13905 // Create the new vector type.
13906 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
13907 // Check if the type is legal.
13908 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13909 if (!TLI.isTypeLegal(VecVT))
13910 return SDValue();
13911
13912 // Combine:
13913 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
13914 // => BITCAST INSERT_VECTOR_ELT
13915 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
13916 // (BITCAST EN), N.
13917 SDValue Vec = DAG.getUNDEF(VecVT);
13918 SDLoc dl(N);
13919 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
13920 SDValue V = N->getOperand(Idx);
13921 if (V.isUndef())
13922 continue;
13923 if (V.getOpcode() == ISD::BITCAST &&
13924 V->getOperand(0).getValueType() == MVT::i32)
13925 // Fold obvious case.
13926 V = V.getOperand(0);
13927 else {
13928 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
13929 // Make the DAGCombiner fold the bitcasts.
13930 DCI.AddToWorklist(V.getNode());
13931 }
13932 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
13933 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
13934 }
13935 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
13936 // Make the DAGCombiner fold the bitcasts.
13937 DCI.AddToWorklist(Vec.getNode());
13938 return Vec;
13939}
13940
13941static SDValue
13942PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
13943 EVT VT = N->getValueType(0);
13944 SDValue Op = N->getOperand(0);
13945 SDLoc dl(N);
13946
13947 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
13948 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
13949 // If the valuetypes are the same, we can remove the cast entirely.
13950 if (Op->getOperand(0).getValueType() == VT)
13951 return Op->getOperand(0);
13952 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
13953 }
13954
13955 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
13956 // more VPNOT which might get folded as else predicates.
13957 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
13958 SDValue X =
13959 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
13960 SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
13961 DCI.DAG.getConstant(65535, dl, MVT::i32));
13962 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
13963 }
13964
13965 // Only the bottom 16 bits of the source register are used.
13966 if (Op.getValueType() == MVT::i32) {
13967 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
13968 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
13969 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
13970 return SDValue(N, 0);
13971 }
13972 return SDValue();
13973}
13974
13975static SDValue
13976PerformVECTOR_REG_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13977 const ARMSubtarget *ST) {
13978 EVT VT = N->getValueType(0);
13979 SDValue Op = N->getOperand(0);
13980 SDLoc dl(N);
13981
13982 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
13983 if (ST->isLittle())
13984 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, Op);
13985
13986 // VECTOR_REG_CAST undef -> undef
13987 if (Op.isUndef())
13988 return DCI.DAG.getUNDEF(VT);
13989
13990 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
13991 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
13992 // If the valuetypes are the same, we can remove the cast entirely.
13993 if (Op->getOperand(0).getValueType() == VT)
13994 return Op->getOperand(0);
13995 return DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
13996 }
13997
13998 return SDValue();
13999}
14000
14001static SDValue PerformVCMPCombine(SDNode *N,
14002 TargetLowering::DAGCombinerInfo &DCI,
14003 const ARMSubtarget *Subtarget) {
14004 if (!Subtarget->hasMVEIntegerOps())
14005 return SDValue();
14006
14007 EVT VT = N->getValueType(0);
14008 SDValue Op0 = N->getOperand(0);
14009 SDValue Op1 = N->getOperand(1);
14010 ARMCC::CondCodes Cond =
14011 (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
14012 SDLoc dl(N);
14013
14014 // vcmp X, 0, cc -> vcmpz X, cc
14015 if (isZeroVector(Op1))
14016 return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0,
14017 N->getOperand(2));
14018
14019 unsigned SwappedCond = getSwappedCondition(Cond);
14020 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
14021 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
14022 if (isZeroVector(Op0))
14023 return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
14024 DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
14025 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
14026 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
14027 return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
14028 DCI.DAG.getConstant(SwappedCond, dl, MVT::i32));
14029 }
14030
14031 return SDValue();
14032}
14033
14034/// PerformInsertEltCombine - Target-specific dag combine xforms for
14035/// ISD::INSERT_VECTOR_ELT.
14036static SDValue PerformInsertEltCombine(SDNode *N,
14037 TargetLowering::DAGCombinerInfo &DCI) {
14038 // Bitcast an i64 load inserted into a vector to f64.
14039 // Otherwise, the i64 value will be legalized to a pair of i32 values.
14040 EVT VT = N->getValueType(0);
14041 SDNode *Elt = N->getOperand(1).getNode();
14042 if (VT.getVectorElementType() != MVT::i64 ||
14043 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
14044 return SDValue();
14045
14046 SelectionDAG &DAG = DCI.DAG;
14047 SDLoc dl(N);
14048 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
14049 VT.getVectorNumElements());
14050 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
14051 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
14052 // Make the DAGCombiner fold the bitcasts.
14053 DCI.AddToWorklist(Vec.getNode());
14054 DCI.AddToWorklist(V.getNode());
14055 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
14056 Vec, V, N->getOperand(2));
14057 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
14058}
14059
14060static SDValue PerformExtractEltCombine(SDNode *N,
14061 TargetLowering::DAGCombinerInfo &DCI,
14062 const ARMSubtarget *ST) {
14063 SDValue Op0 = N->getOperand(0);
14064 EVT VT = N->getValueType(0);
14065 SDLoc dl(N);
14066
14067 // extract (vdup x) -> x
14068 if (Op0->getOpcode() == ARMISD::VDUP) {
14069 SDValue X = Op0->getOperand(0);
14070 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
14071 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
14072 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
14073 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
14074
14075 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
14076 X = X->getOperand(0);
14077 if (X.getValueType() == VT)
14078 return X;
14079 }
14080
14081 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
14082 if (Op0.getValueType() == MVT::v4i32 &&
14083 isa<ConstantSDNode>(N->getOperand(1)) &&
14084 Op0.getOpcode() == ISD::BITCAST &&
14085 Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
14086 Op0.getOperand(0).getValueType() == MVT::v2f64) {
14087 SDValue BV = Op0.getOperand(0);
14088 unsigned Offset = N->getConstantOperandVal(1);
14089 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
14090 if (MOV.getOpcode() == ARMISD::VMOVDRR)
14091 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
14092 }
14093
14094 return SDValue();
14095}
14096
14097static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
14098 SDValue Op = N->getOperand(0);
14099 EVT VT = N->getValueType(0);
14100
14101 // sext_inreg(VGETLANEu) -> VGETLANEs
14102 if (Op.getOpcode() == ARMISD::VGETLANEu &&
14103 cast<VTSDNode>(N->getOperand(1))->getVT() ==
14104 Op.getOperand(0).getValueType().getScalarType())
14105 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
14106 Op.getOperand(1));
14107
14108 return SDValue();
14109}
14110
14111// When lowering complex nodes that we recognize, like VQDMULH and MULH, we
14112// can end up with shuffle(binop(shuffle, shuffle)), that can be simplified to
14113// binop as the shuffles cancel out.
14114static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
14115 EVT VT = N->getValueType(0);
14116 if (!N->getOperand(1).isUndef() || N->getOperand(0).getValueType() != VT)
14117 return SDValue();
14118 SDValue Op = N->getOperand(0);
14119
14120 // Looking for binary operators that will have been folded from
14121 // truncates/extends.
14122 switch (Op.getOpcode()) {
14123 case ARMISD::VQDMULH:
14124 case ISD::MULHS:
14125 case ISD::MULHU:
14126 break;
14127 default:
14128 return SDValue();
14129 }
14130
14131 ShuffleVectorSDNode *Op0 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0));
14132 ShuffleVectorSDNode *Op1 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1));
14133 if (!Op0 || !Op1 || !Op0->getOperand(1).isUndef() ||
14134 !Op1->getOperand(1).isUndef() || Op0->getMask() != Op1->getMask() ||
14135 Op0->getOperand(0).getValueType() != VT)
14136 return SDValue();
14137
14138 // Check the mask turns into an identity shuffle.
14139 ArrayRef<int> NMask = N->getMask();
14140 ArrayRef<int> OpMask = Op0->getMask();
14141 for (int i = 0, e = NMask.size(); i != e; i++) {
14142 if (NMask[i] > 0 && OpMask[NMask[i]] > 0 && OpMask[NMask[i]] != i)
14143 return SDValue();
14144 }
14145
14146 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
14147 Op0->getOperand(0), Op1->getOperand(0));
14148}
14149
14150/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
14151/// ISD::VECTOR_SHUFFLE.
14152static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
14153 if (SDValue R = FlattenVectorShuffle(cast<ShuffleVectorSDNode>(N), DAG))
14154 return R;
14155
14156 // The LLVM shufflevector instruction does not require the shuffle mask
14157 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
14158 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
14159 // operands do not match the mask length, they are extended by concatenating
14160 // them with undef vectors. That is probably the right thing for other
14161 // targets, but for NEON it is better to concatenate two double-register
14162 // size vector operands into a single quad-register size vector. Do that
14163 // transformation here:
14164 // shuffle(concat(v1, undef), concat(v2, undef)) ->
14165 // shuffle(concat(v1, v2), undef)
14166 SDValue Op0 = N->getOperand(0);
14167 SDValue Op1 = N->getOperand(1);
14168 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
14169 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
14170 Op0.getNumOperands() != 2 ||
14171 Op1.getNumOperands() != 2)
14172 return SDValue();
14173 SDValue Concat0Op1 = Op0.getOperand(1);
14174 SDValue Concat1Op1 = Op1.getOperand(1);
14175 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
14176 return SDValue();
14177 // Skip the transformation if any of the types are illegal.
14178 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14179 EVT VT = N->getValueType(0);
14180 if (!TLI.isTypeLegal(VT) ||
14181 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
14182 !TLI.isTypeLegal(Concat1Op1.getValueType()))
14183 return SDValue();
14184
14185 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
14186 Op0.getOperand(0), Op1.getOperand(0));
14187 // Translate the shuffle mask.
14188 SmallVector<int, 16> NewMask;
14189 unsigned NumElts = VT.getVectorNumElements();
14190 unsigned HalfElts = NumElts/2;
14191 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
14192 for (unsigned n = 0; n < NumElts; ++n) {
14193 int MaskElt = SVN->getMaskElt(n);
14194 int NewElt = -1;
14195 if (MaskElt < (int)HalfElts)
14196 NewElt = MaskElt;
14197 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
14198 NewElt = HalfElts + MaskElt - NumElts;
14199 NewMask.push_back(NewElt);
14200 }
14201 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
14202 DAG.getUNDEF(VT), NewMask);
14203}
14204
14205/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
14206/// NEON load/store intrinsics, and generic vector load/stores, to merge
14207/// base address updates.
14208/// For generic load/stores, the memory type is assumed to be a vector.
14209/// The caller is assumed to have checked legality.
14210static SDValue CombineBaseUpdate(SDNode *N,
14211 TargetLowering::DAGCombinerInfo &DCI) {
14212 SelectionDAG &DAG = DCI.DAG;
14213 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
14214 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
14215 const bool isStore = N->getOpcode() == ISD::STORE;
14216 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
14217 SDValue Addr = N->getOperand(AddrOpIdx);
14218 MemSDNode *MemN = cast<MemSDNode>(N);
14219 SDLoc dl(N);
14220
14221 // Search for a use of the address operand that is an increment.
14222 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
14223 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
14224 SDNode *User = *UI;
14225 if (User->getOpcode() != ISD::ADD ||
14226 UI.getUse().getResNo() != Addr.getResNo())
14227 continue;
14228
14229 // Check that the add is independent of the load/store. Otherwise, folding
14230 // it would create a cycle. We can avoid searching through Addr as it's a
14231 // predecessor to both.
14232 SmallPtrSet<const SDNode *, 32> Visited;
14233 SmallVector<const SDNode *, 16> Worklist;
14234 Visited.insert(Addr.getNode());
14235 Worklist.push_back(N);
14236 Worklist.push_back(User);
14237 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
14238 SDNode::hasPredecessorHelper(User, Visited, Worklist))
14239 continue;
14240
14241 // Find the new opcode for the updating load/store.
14242 bool isLoadOp = true;
14243 bool isLaneOp = false;
14244 unsigned NewOpc = 0;
14245 unsigned NumVecs = 0;
14246 if (isIntrinsic) {
14247 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
14248 switch (IntNo) {
14249 default: llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14249)
;
14250 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
14251 NumVecs = 1; break;
14252 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
14253 NumVecs = 2; break;
14254 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
14255 NumVecs = 3; break;
14256 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
14257 NumVecs = 4; break;
14258 case Intrinsic::arm_neon_vld1x2:
14259 case Intrinsic::arm_neon_vld1x3:
14260 case Intrinsic::arm_neon_vld1x4:
14261 case Intrinsic::arm_neon_vld2dup:
14262 case Intrinsic::arm_neon_vld3dup:
14263 case Intrinsic::arm_neon_vld4dup:
14264 // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
14265 // combining base updates for such intrinsics.
14266 continue;
14267 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
14268 NumVecs = 2; isLaneOp = true; break;
14269 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
14270 NumVecs = 3; isLaneOp = true; break;
14271 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
14272 NumVecs = 4; isLaneOp = true; break;
14273 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
14274 NumVecs = 1; isLoadOp = false; break;
14275 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
14276 NumVecs = 2; isLoadOp = false; break;
14277 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
14278 NumVecs = 3; isLoadOp = false; break;
14279 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
14280 NumVecs = 4; isLoadOp = false; break;
14281 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
14282 NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
14283 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
14284 NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
14285 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
14286 NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
14287 }
14288 } else {
14289 isLaneOp = true;
14290 switch (N->getOpcode()) {
14291 default: llvm_unreachable("unexpected opcode for Neon base update")::llvm::llvm_unreachable_internal("unexpected opcode for Neon base update"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14291)
;
14292 case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
14293 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
14294 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
14295 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
14296 case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
14297 NumVecs = 1; isLaneOp = false; break;
14298 case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
14299 NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
14300 }
14301 }
14302
14303 // Find the size of memory referenced by the load/store.
14304 EVT VecTy;
14305 if (isLoadOp) {
14306 VecTy = N->getValueType(0);
14307 } else if (isIntrinsic) {
14308 VecTy = N->getOperand(AddrOpIdx+1).getValueType();
14309 } else {
14310 assert(isStore && "Node has to be a load, a store, or an intrinsic!")((isStore && "Node has to be a load, a store, or an intrinsic!"
) ? static_cast<void> (0) : __assert_fail ("isStore && \"Node has to be a load, a store, or an intrinsic!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14310, __PRETTY_FUNCTION__))
;
14311 VecTy = N->getOperand(1).getValueType();
14312 }
14313
14314 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
14315 if (isLaneOp)
14316 NumBytes /= VecTy.getVectorNumElements();
14317
14318 // If the increment is a constant, it must match the memory ref size.
14319 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
14320 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
14321 if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
14322 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
14323 // separate instructions that make it harder to use a non-constant update.
14324 continue;
14325 }
14326
14327 // OK, we found an ADD we can fold into the base update.
14328 // Now, create a _UPD node, taking care of not breaking alignment.
14329
14330 EVT AlignedVecTy = VecTy;
14331 unsigned Alignment = MemN->getAlignment();
14332
14333 // If this is a less-than-standard-aligned load/store, change the type to
14334 // match the standard alignment.
14335 // The alignment is overlooked when selecting _UPD variants; and it's
14336 // easier to introduce bitcasts here than fix that.
14337 // There are 3 ways to get to this base-update combine:
14338 // - intrinsics: they are assumed to be properly aligned (to the standard
14339 // alignment of the memory type), so we don't need to do anything.
14340 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
14341 // intrinsics, so, likewise, there's nothing to do.
14342 // - generic load/store instructions: the alignment is specified as an
14343 // explicit operand, rather than implicitly as the standard alignment
14344 // of the memory type (like the intrisics). We need to change the
14345 // memory type to match the explicit alignment. That way, we don't
14346 // generate non-standard-aligned ARMISD::VLDx nodes.
14347 if (isa<LSBaseSDNode>(N)) {
14348 if (Alignment == 0)
14349 Alignment = 1;
14350 if (Alignment < VecTy.getScalarSizeInBits() / 8) {
14351 MVT EltTy = MVT::getIntegerVT(Alignment * 8);
14352 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.")((NumVecs == 1 && "Unexpected multi-element generic load/store."
) ? static_cast<void> (0) : __assert_fail ("NumVecs == 1 && \"Unexpected multi-element generic load/store.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14352, __PRETTY_FUNCTION__))
;
14353 assert(!isLaneOp && "Unexpected generic load/store lane.")((!isLaneOp && "Unexpected generic load/store lane.")
? static_cast<void> (0) : __assert_fail ("!isLaneOp && \"Unexpected generic load/store lane.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14353, __PRETTY_FUNCTION__))
;
14354 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
14355 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
14356 }
14357 // Don't set an explicit alignment on regular load/stores that we want
14358 // to transform to VLD/VST 1_UPD nodes.
14359 // This matches the behavior of regular load/stores, which only get an
14360 // explicit alignment if the MMO alignment is larger than the standard
14361 // alignment of the memory type.
14362 // Intrinsics, however, always get an explicit alignment, set to the
14363 // alignment of the MMO.
14364 Alignment = 1;
14365 }
14366
14367 // Create the new updating load/store node.
14368 // First, create an SDVTList for the new updating node's results.
14369 EVT Tys[6];
14370 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
14371 unsigned n;
14372 for (n = 0; n < NumResultVecs; ++n)
14373 Tys[n] = AlignedVecTy;
14374 Tys[n++] = MVT::i32;
14375 Tys[n] = MVT::Other;
14376 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
14377
14378 // Then, gather the new node's operands.
14379 SmallVector<SDValue, 8> Ops;
14380 Ops.push_back(N->getOperand(0)); // incoming chain
14381 Ops.push_back(N->getOperand(AddrOpIdx));
14382 Ops.push_back(Inc);
14383
14384 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
14385 // Try to match the intrinsic's signature
14386 Ops.push_back(StN->getValue());
14387 } else {
14388 // Loads (and of course intrinsics) match the intrinsics' signature,
14389 // so just add all but the alignment operand.
14390 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
14391 Ops.push_back(N->getOperand(i));
14392 }
14393
14394 // For all node types, the alignment operand is always the last one.
14395 Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
14396
14397 // If this is a non-standard-aligned STORE, the penultimate operand is the
14398 // stored value. Bitcast it to the aligned type.
14399 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
14400 SDValue &StVal = Ops[Ops.size()-2];
14401 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
14402 }
14403
14404 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
14405 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
14406 MemN->getMemOperand());
14407
14408 // Update the uses.
14409 SmallVector<SDValue, 5> NewResults;
14410 for (unsigned i = 0; i < NumResultVecs; ++i)
14411 NewResults.push_back(SDValue(UpdN.getNode(), i));
14412
14413 // If this is an non-standard-aligned LOAD, the first result is the loaded
14414 // value. Bitcast it to the expected result type.
14415 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
14416 SDValue &LdVal = NewResults[0];
14417 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
14418 }
14419
14420 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
14421 DCI.CombineTo(N, NewResults);
14422 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
14423
14424 break;
14425 }
14426 return SDValue();
14427}
14428
14429static SDValue PerformVLDCombine(SDNode *N,
14430 TargetLowering::DAGCombinerInfo &DCI) {
14431 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14432 return SDValue();
14433
14434 return CombineBaseUpdate(N, DCI);
14435}
14436
14437static SDValue PerformMVEVLDCombine(SDNode *N,
14438 TargetLowering::DAGCombinerInfo &DCI) {
14439 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14440 return SDValue();
14441
14442 SelectionDAG &DAG = DCI.DAG;
14443 SDValue Addr = N->getOperand(2);
14444 MemSDNode *MemN = cast<MemSDNode>(N);
14445 SDLoc dl(N);
14446
14447 // For the stores, where there are multiple intrinsics we only actually want
14448 // to post-inc the last of the them.
14449 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
14450 if (IntNo == Intrinsic::arm_mve_vst2q &&
14451 cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
14452 return SDValue();
14453 if (IntNo == Intrinsic::arm_mve_vst4q &&
14454 cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
14455 return SDValue();
14456
14457 // Search for a use of the address operand that is an increment.
14458 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
14459 UE = Addr.getNode()->use_end();
14460 UI != UE; ++UI) {
14461 SDNode *User = *UI;
14462 if (User->getOpcode() != ISD::ADD ||
14463 UI.getUse().getResNo() != Addr.getResNo())
14464 continue;
14465
14466 // Check that the add is independent of the load/store. Otherwise, folding
14467 // it would create a cycle. We can avoid searching through Addr as it's a
14468 // predecessor to both.
14469 SmallPtrSet<const SDNode *, 32> Visited;
14470 SmallVector<const SDNode *, 16> Worklist;
14471 Visited.insert(Addr.getNode());
14472 Worklist.push_back(N);
14473 Worklist.push_back(User);
14474 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
14475 SDNode::hasPredecessorHelper(User, Visited, Worklist))
14476 continue;
14477
14478 // Find the new opcode for the updating load/store.
14479 bool isLoadOp = true;
14480 unsigned NewOpc = 0;
14481 unsigned NumVecs = 0;
14482 switch (IntNo) {
14483 default:
14484 llvm_unreachable("unexpected intrinsic for MVE VLDn combine")::llvm::llvm_unreachable_internal("unexpected intrinsic for MVE VLDn combine"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14484)
;
14485 case Intrinsic::arm_mve_vld2q:
14486 NewOpc = ARMISD::VLD2_UPD;
14487 NumVecs = 2;
14488 break;
14489 case Intrinsic::arm_mve_vld4q:
14490 NewOpc = ARMISD::VLD4_UPD;
14491 NumVecs = 4;
14492 break;
14493 case Intrinsic::arm_mve_vst2q:
14494 NewOpc = ARMISD::VST2_UPD;
14495 NumVecs = 2;
14496 isLoadOp = false;
14497 break;
14498 case Intrinsic::arm_mve_vst4q:
14499 NewOpc = ARMISD::VST4_UPD;
14500 NumVecs = 4;
14501 isLoadOp = false;
14502 break;
14503 }
14504
14505 // Find the size of memory referenced by the load/store.
14506 EVT VecTy;
14507 if (isLoadOp) {
14508 VecTy = N->getValueType(0);
14509 } else {
14510 VecTy = N->getOperand(3).getValueType();
14511 }
14512
14513 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
14514
14515 // If the increment is a constant, it must match the memory ref size.
14516 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
14517 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
14518 if (!CInc || CInc->getZExtValue() != NumBytes)
14519 continue;
14520
14521 // Create the new updating load/store node.
14522 // First, create an SDVTList for the new updating node's results.
14523 EVT Tys[6];
14524 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
14525 unsigned n;
14526 for (n = 0; n < NumResultVecs; ++n)
14527 Tys[n] = VecTy;
14528 Tys[n++] = MVT::i32;
14529 Tys[n] = MVT::Other;
14530 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
14531
14532 // Then, gather the new node's operands.
14533 SmallVector<SDValue, 8> Ops;
14534 Ops.push_back(N->getOperand(0)); // incoming chain
14535 Ops.push_back(N->getOperand(2)); // ptr
14536 Ops.push_back(Inc);
14537
14538 for (unsigned i = 3; i < N->getNumOperands(); ++i)
14539 Ops.push_back(N->getOperand(i));
14540
14541 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
14542 MemN->getMemOperand());
14543
14544 // Update the uses.
14545 SmallVector<SDValue, 5> NewResults;
14546 for (unsigned i = 0; i < NumResultVecs; ++i)
14547 NewResults.push_back(SDValue(UpdN.getNode(), i));
14548
14549 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
14550 DCI.CombineTo(N, NewResults);
14551 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
14552
14553 break;
14554 }
14555
14556 return SDValue();
14557}
14558
14559/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
14560/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
14561/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
14562/// return true.
14563static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
14564 SelectionDAG &DAG = DCI.DAG;
14565 EVT VT = N->getValueType(0);
14566 // vldN-dup instructions only support 64-bit vectors for N > 1.
14567 if (!VT.is64BitVector())
14568 return false;
14569
14570 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
14571 SDNode *VLD = N->getOperand(0).getNode();
14572 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
14573 return false;
14574 unsigned NumVecs = 0;
14575 unsigned NewOpc = 0;
14576 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
14577 if (IntNo == Intrinsic::arm_neon_vld2lane) {
14578 NumVecs = 2;
14579 NewOpc = ARMISD::VLD2DUP;
14580 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
14581 NumVecs = 3;
14582 NewOpc = ARMISD::VLD3DUP;
14583 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
14584 NumVecs = 4;
14585 NewOpc = ARMISD::VLD4DUP;
14586 } else {
14587 return false;
14588 }
14589
14590 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
14591 // numbers match the load.
14592 unsigned VLDLaneNo =
14593 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
14594 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
14595 UI != UE; ++UI) {
14596 // Ignore uses of the chain result.
14597 if (UI.getUse().getResNo() == NumVecs)
14598 continue;
14599 SDNode *User = *UI;
14600 if (User->getOpcode() != ARMISD::VDUPLANE ||
14601 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
14602 return false;
14603 }
14604
14605 // Create the vldN-dup node.
14606 EVT Tys[5];
14607 unsigned n;
14608 for (n = 0; n < NumVecs; ++n)
14609 Tys[n] = VT;
14610 Tys[n] = MVT::Other;
14611 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
14612 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
14613 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
14614 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
14615 Ops, VLDMemInt->getMemoryVT(),
14616 VLDMemInt->getMemOperand());
14617
14618 // Update the uses.
14619 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
14620 UI != UE; ++UI) {
14621 unsigned ResNo = UI.getUse().getResNo();
14622 // Ignore uses of the chain result.
14623 if (ResNo == NumVecs)
14624 continue;
14625 SDNode *User = *UI;
14626 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
14627 }
14628
14629 // Now the vldN-lane intrinsic is dead except for its chain result.
14630 // Update uses of the chain.
14631 std::vector<SDValue> VLDDupResults;
14632 for (unsigned n = 0; n < NumVecs; ++n)
14633 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
14634 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
14635 DCI.CombineTo(VLD, VLDDupResults);
14636
14637 return true;
14638}
14639
14640/// PerformVDUPLANECombine - Target-specific dag combine xforms for
14641/// ARMISD::VDUPLANE.
14642static SDValue PerformVDUPLANECombine(SDNode *N,
14643 TargetLowering::DAGCombinerInfo &DCI,
14644 const ARMSubtarget *Subtarget) {
14645 SDValue Op = N->getOperand(0);
14646 EVT VT = N->getValueType(0);
14647
14648 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
14649 if (Subtarget->hasMVEIntegerOps()) {
14650 EVT ExtractVT = VT.getVectorElementType();
14651 // We need to ensure we are creating a legal type.
14652 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
14653 ExtractVT = MVT::i32;
14654 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
14655 N->getOperand(0), N->getOperand(1));
14656 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
14657 }
14658
14659 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
14660 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
14661 if (CombineVLDDUP(N, DCI))
14662 return SDValue(N, 0);
14663
14664 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
14665 // redundant. Ignore bit_converts for now; element sizes are checked below.
14666 while (Op.getOpcode() == ISD::BITCAST)
14667 Op = Op.getOperand(0);
14668 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
14669 return SDValue();
14670
14671 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
14672 unsigned EltSize = Op.getScalarValueSizeInBits();
14673 // The canonical VMOV for a zero vector uses a 32-bit element size.
14674 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
14675 unsigned EltBits;
14676 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
14677 EltSize = 8;
14678 if (EltSize > VT.getScalarSizeInBits())
14679 return SDValue();
14680
14681 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
14682}
14683
14684/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
14685static SDValue PerformVDUPCombine(SDNode *N,
14686 TargetLowering::DAGCombinerInfo &DCI,
14687 const ARMSubtarget *Subtarget) {
14688 SelectionDAG &DAG = DCI.DAG;
14689 SDValue Op = N->getOperand(0);
14690 SDLoc dl(N);
14691
14692 if (Subtarget->hasMVEIntegerOps()) {
14693 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
14694 // need to come from a GPR.
14695 if (Op.getValueType() == MVT::f32)
14696 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
14697 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
14698 else if (Op.getValueType() == MVT::f16)
14699 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
14700 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
14701 }
14702
14703 if (!Subtarget->hasNEON())
14704 return SDValue();
14705
14706 // Match VDUP(LOAD) -> VLD1DUP.
14707 // We match this pattern here rather than waiting for isel because the
14708 // transform is only legal for unindexed loads.
14709 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
14710 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
14711 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
14712 SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
14713 DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
14714 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
14715 SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
14716 Ops, LD->getMemoryVT(),
14717 LD->getMemOperand());
14718 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
14719 return VLDDup;
14720 }
14721
14722 return SDValue();
14723}
14724
14725static SDValue PerformLOADCombine(SDNode *N,
14726 TargetLowering::DAGCombinerInfo &DCI) {
14727 EVT VT = N->getValueType(0);
14728
14729 // If this is a legal vector load, try to combine it into a VLD1_UPD.
14730 if (ISD::isNormalLoad(N) && VT.isVector() &&
14731 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
14732 return CombineBaseUpdate(N, DCI);
14733
14734 return SDValue();
14735}
14736
14737// Optimize trunc store (of multiple scalars) to shuffle and store. First,
14738// pack all of the elements in one place. Next, store to memory in fewer
14739// chunks.
14740static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
14741 SelectionDAG &DAG) {
14742 SDValue StVal = St->getValue();
14743 EVT VT = StVal.getValueType();
14744 if (!St->isTruncatingStore() || !VT.isVector())
14745 return SDValue();
14746 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14747 EVT StVT = St->getMemoryVT();
14748 unsigned NumElems = VT.getVectorNumElements();
14749 assert(StVT != VT && "Cannot truncate to the same type")((StVT != VT && "Cannot truncate to the same type") ?
static_cast<void> (0) : __assert_fail ("StVT != VT && \"Cannot truncate to the same type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14749, __PRETTY_FUNCTION__))
;
14750 unsigned FromEltSz = VT.getScalarSizeInBits();
14751 unsigned ToEltSz = StVT.getScalarSizeInBits();
14752
14753 // From, To sizes and ElemCount must be pow of two
14754 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
14755 return SDValue();
14756
14757 // We are going to use the original vector elt for storing.
14758 // Accumulated smaller vector elements must be a multiple of the store size.
14759 if (0 != (NumElems * FromEltSz) % ToEltSz)
14760 return SDValue();
14761
14762 unsigned SizeRatio = FromEltSz / ToEltSz;
14763 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits())((SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14763, __PRETTY_FUNCTION__))
;
14764
14765 // Create a type on which we perform the shuffle.
14766 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
14767 NumElems * SizeRatio);
14768 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits())((WideVecVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("WideVecVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14768, __PRETTY_FUNCTION__))
;
14769
14770 SDLoc DL(St);
14771 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
14772 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
14773 for (unsigned i = 0; i < NumElems; ++i)
14774 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
14775 : i * SizeRatio;
14776
14777 // Can't shuffle using an illegal type.
14778 if (!TLI.isTypeLegal(WideVecVT))
14779 return SDValue();
14780
14781 SDValue Shuff = DAG.getVectorShuffle(
14782 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
14783 // At this point all of the data is stored at the bottom of the
14784 // register. We now need to save it to mem.
14785
14786 // Find the largest store unit
14787 MVT StoreType = MVT::i8;
14788 for (MVT Tp : MVT::integer_valuetypes()) {
14789 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
14790 StoreType = Tp;
14791 }
14792 // Didn't find a legal store type.
14793 if (!TLI.isTypeLegal(StoreType))
14794 return SDValue();
14795
14796 // Bitcast the original vector into a vector of store-size units
14797 EVT StoreVecVT =
14798 EVT::getVectorVT(*DAG.getContext(), StoreType,
14799 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
14800 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits())((StoreVecVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("StoreVecVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14800, __PRETTY_FUNCTION__))
;
14801 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
14802 SmallVector<SDValue, 8> Chains;
14803 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
14804 TLI.getPointerTy(DAG.getDataLayout()));
14805 SDValue BasePtr = St->getBasePtr();
14806
14807 // Perform one or more big stores into memory.
14808 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
14809 for (unsigned I = 0; I < E; I++) {
14810 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
14811 ShuffWide, DAG.getIntPtrConstant(I, DL));
14812 SDValue Ch =
14813 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
14814 St->getAlignment(), St->getMemOperand()->getFlags());
14815 BasePtr =
14816 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
14817 Chains.push_back(Ch);
14818 }
14819 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
14820}
14821
14822// Try taking a single vector store from an truncate (which would otherwise turn
14823// into an expensive buildvector) and splitting it into a series of narrowing
14824// stores.
14825static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
14826 SelectionDAG &DAG) {
14827 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
14828 return SDValue();
14829 SDValue Trunc = St->getValue();
14830 if (Trunc->getOpcode() != ISD::TRUNCATE && Trunc->getOpcode() != ISD::FP_ROUND)
14831 return SDValue();
14832 EVT FromVT = Trunc->getOperand(0).getValueType();
14833 EVT ToVT = Trunc.getValueType();
14834 if (!ToVT.isVector())
14835 return SDValue();
14836 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())((FromVT.getVectorNumElements() == ToVT.getVectorNumElements(
)) ? static_cast<void> (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14836, __PRETTY_FUNCTION__))
;
14837 EVT ToEltVT = ToVT.getVectorElementType();
14838 EVT FromEltVT = FromVT.getVectorElementType();
14839
14840 unsigned NumElements = 0;
14841 if (FromEltVT == MVT::i32 && (ToEltVT == MVT::i16 || ToEltVT == MVT::i8))
14842 NumElements = 4;
14843 if (FromEltVT == MVT::i16 && ToEltVT == MVT::i8)
14844 NumElements = 8;
14845 if (FromEltVT == MVT::f32 && ToEltVT == MVT::f16)
14846 NumElements = 4;
14847 if (NumElements == 0 ||
14848 (FromEltVT != MVT::f32 && FromVT.getVectorNumElements() == NumElements) ||
14849 FromVT.getVectorNumElements() % NumElements != 0)
14850 return SDValue();
14851
14852 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
14853 // use the VMOVN over splitting the store. We are looking for patterns of:
14854 // !rev: 0 N 1 N+1 2 N+2 ...
14855 // rev: N 0 N+1 1 N+2 2 ...
14856 // The shuffle may either be a single source (in which case N = NumElts/2) or
14857 // two inputs extended with concat to the same size (in which case N =
14858 // NumElts).
14859 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
14860 ArrayRef<int> M = SVN->getMask();
14861 unsigned NumElts = ToVT.getVectorNumElements();
14862 if (SVN->getOperand(1).isUndef())
14863 NumElts /= 2;
14864
14865 unsigned Off0 = Rev ? NumElts : 0;
14866 unsigned Off1 = Rev ? 0 : NumElts;
14867
14868 for (unsigned I = 0; I < NumElts; I += 2) {
14869 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
14870 return false;
14871 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
14872 return false;
14873 }
14874
14875 return true;
14876 };
14877
14878 // It may be preferable to keep the store unsplit as the trunc may end up
14879 // being removed. Check that here.
14880 if (Trunc.getOperand(0).getOpcode() == ISD::SMIN) {
14881 if (SDValue U = PerformVQDMULHCombine(Trunc.getOperand(0).getNode(), DAG)) {
14882 DAG.ReplaceAllUsesWith(Trunc.getOperand(0), U);
14883 return SDValue();
14884 }
14885 }
14886 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
14887 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
14888 return SDValue();
14889
14890 LLVMContext &C = *DAG.getContext();
14891 SDLoc DL(St);
14892 // Details about the old store
14893 SDValue Ch = St->getChain();
14894 SDValue BasePtr = St->getBasePtr();
14895 Align Alignment = St->getOriginalAlign();
14896 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
14897 AAMDNodes AAInfo = St->getAAInfo();
14898
14899 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
14900 // and then stored as truncating integer stores.
14901 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
14902 EVT NewToVT = EVT::getVectorVT(
14903 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
14904
14905 SmallVector<SDValue, 4> Stores;
14906 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
14907 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
14908 SDValue NewPtr =
14909 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
14910
14911 SDValue Extract =
14912 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
14913 DAG.getConstant(i * NumElements, DL, MVT::i32));
14914
14915 if (ToEltVT == MVT::f16) {
14916 SDValue FPTrunc =
14917 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
14918 Extract, DAG.getConstant(0, DL, MVT::i32));
14919 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
14920 }
14921
14922 SDValue Store = DAG.getTruncStore(
14923 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
14924 NewToVT, Alignment.value(), MMOFlags, AAInfo);
14925 Stores.push_back(Store);
14926 }
14927 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
14928}
14929
14930// Given a floating point store from an extracted vector, with an integer
14931// VGETLANE that already exists, store the existing VGETLANEu directly. This can
14932// help reduce fp register pressure, doesn't require the fp extract and allows
14933// use of more integer post-inc stores not available with vstr.
14934static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG) {
14935 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
14936 return SDValue();
14937 SDValue Extract = St->getValue();
14938 EVT VT = Extract.getValueType();
14939 // For now only uses f16. This may be useful for f32 too, but that will
14940 // be bitcast(extract), not the VGETLANEu we currently check here.
14941 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14942 return SDValue();
14943
14944 SDNode *GetLane =
14945 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
14946 {Extract.getOperand(0), Extract.getOperand(1)});
14947 if (!GetLane)
14948 return SDValue();
14949
14950 LLVMContext &C = *DAG.getContext();
14951 SDLoc DL(St);
14952 // Create a new integer store to replace the existing floating point version.
14953 SDValue Ch = St->getChain();
14954 SDValue BasePtr = St->getBasePtr();
14955 Align Alignment = St->getOriginalAlign();
14956 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
14957 AAMDNodes AAInfo = St->getAAInfo();
14958 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
14959 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
14960 St->getPointerInfo(), NewToVT,
14961 Alignment.value(), MMOFlags, AAInfo);
14962
14963 return Store;
14964}
14965
14966/// PerformSTORECombine - Target-specific dag combine xforms for
14967/// ISD::STORE.
14968static SDValue PerformSTORECombine(SDNode *N,
14969 TargetLowering::DAGCombinerInfo &DCI,
14970 const ARMSubtarget *Subtarget) {
14971 StoreSDNode *St = cast<StoreSDNode>(N);
14972 if (St->isVolatile())
14973 return SDValue();
14974 SDValue StVal = St->getValue();
14975 EVT VT = StVal.getValueType();
14976
14977 if (Subtarget->hasNEON())
14978 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
14979 return Store;
14980
14981 if (Subtarget->hasMVEIntegerOps()) {
14982 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
14983 return NewToken;
14984 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
14985 return NewChain;
14986 }
14987
14988 if (!ISD::isNormalStore(St))
14989 return SDValue();
14990
14991 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
14992 // ARM stores of arguments in the same cache line.
14993 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
14994 StVal.getNode()->hasOneUse()) {
14995 SelectionDAG &DAG = DCI.DAG;
14996 bool isBigEndian = DAG.getDataLayout().isBigEndian();
14997 SDLoc DL(St);
14998 SDValue BasePtr = St->getBasePtr();
14999 SDValue NewST1 = DAG.getStore(
15000 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
15001 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
15002 St->getMemOperand()->getFlags());
15003
15004 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15005 DAG.getConstant(4, DL, MVT::i32));
15006 return DAG.getStore(NewST1.getValue(0), DL,
15007 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
15008 OffsetPtr, St->getPointerInfo().getWithOffset(4),
15009 St->getOriginalAlign(),
15010 St->getMemOperand()->getFlags());
15011 }
15012
15013 if (StVal.getValueType() == MVT::i64 &&
15014 StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15015
15016 // Bitcast an i64 store extracted from a vector to f64.
15017 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15018 SelectionDAG &DAG = DCI.DAG;
15019 SDLoc dl(StVal);
15020 SDValue IntVec = StVal.getOperand(0);
15021 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15022 IntVec.getValueType().getVectorNumElements());
15023 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
15024 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15025 Vec, StVal.getOperand(1));
15026 dl = SDLoc(N);
15027 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
15028 // Make the DAGCombiner fold the bitcasts.
15029 DCI.AddToWorklist(Vec.getNode());
15030 DCI.AddToWorklist(ExtElt.getNode());
15031 DCI.AddToWorklist(V.getNode());
15032 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
15033 St->getPointerInfo(), St->getAlignment(),
15034 St->getMemOperand()->getFlags(), St->getAAInfo());
15035 }
15036
15037 // If this is a legal vector store, try to combine it into a VST1_UPD.
15038 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
15039 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
15040 return CombineBaseUpdate(N, DCI);
15041
15042 return SDValue();
15043}
15044
15045/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
15046/// can replace combinations of VMUL and VCVT (floating-point to integer)
15047/// when the VMUL has a constant operand that is a power of 2.
15048///
15049/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
15050/// vmul.f32 d16, d17, d16
15051/// vcvt.s32.f32 d16, d16
15052/// becomes:
15053/// vcvt.s32.f32 d16, d16, #3
15054static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
15055 const ARMSubtarget *Subtarget) {
15056 if (!Subtarget->hasNEON())
15057 return SDValue();
15058
15059 SDValue Op = N->getOperand(0);
15060 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
15061 Op.getOpcode() != ISD::FMUL)
15062 return SDValue();
15063
15064 SDValue ConstVec = Op->getOperand(1);
15065 if (!isa<BuildVectorSDNode>(ConstVec))
15066 return SDValue();
15067
15068 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
15069 uint32_t FloatBits = FloatTy.getSizeInBits();
15070 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
15071 uint32_t IntBits = IntTy.getSizeInBits();
15072 unsigned NumLanes = Op.getValueType().getVectorNumElements();
15073 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
15074 // These instructions only exist converting from f32 to i32. We can handle
15075 // smaller integers by generating an extra truncate, but larger ones would
15076 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
15077 // these intructions only support v2i32/v4i32 types.
15078 return SDValue();
15079 }
15080
15081 BitVector UndefElements;
15082 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
15083 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
15084 if (C == -1 || C == 0 || C > 32)
15085 return SDValue();
15086
15087 SDLoc dl(N);
15088 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
15089 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
15090 Intrinsic::arm_neon_vcvtfp2fxu;
15091 SDValue FixConv = DAG.getNode(
15092 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
15093 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
15094 DAG.getConstant(C, dl, MVT::i32));
15095
15096 if (IntBits < FloatBits)
15097 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
15098
15099 return FixConv;
15100}
15101
15102/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
15103/// can replace combinations of VCVT (integer to floating-point) and VDIV
15104/// when the VDIV has a constant operand that is a power of 2.
15105///
15106/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
15107/// vcvt.f32.s32 d16, d16
15108/// vdiv.f32 d16, d17, d16
15109/// becomes:
15110/// vcvt.f32.s32 d16, d16, #3
15111static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
15112 const ARMSubtarget *Subtarget) {
15113 if (!Subtarget->hasNEON())
15114 return SDValue();
15115
15116 SDValue Op = N->getOperand(0);
15117 unsigned OpOpcode = Op.getNode()->getOpcode();
15118 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
15119 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
15120 return SDValue();
15121
15122 SDValue ConstVec = N->getOperand(1);
15123 if (!isa<BuildVectorSDNode>(ConstVec))
15124 return SDValue();
15125
15126 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
15127 uint32_t FloatBits = FloatTy.getSizeInBits();
15128 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
15129 uint32_t IntBits = IntTy.getSizeInBits();
15130 unsigned NumLanes = Op.getValueType().getVectorNumElements();
15131 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
15132 // These instructions only exist converting from i32 to f32. We can handle
15133 // smaller integers by generating an extra extend, but larger ones would
15134 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
15135 // these intructions only support v2i32/v4i32 types.
15136 return SDValue();
15137 }
15138
15139 BitVector UndefElements;
15140 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
15141 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
15142 if (C == -1 || C == 0 || C > 32)
15143 return SDValue();
15144
15145 SDLoc dl(N);
15146 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
15147 SDValue ConvInput = Op.getOperand(0);
15148 if (IntBits < FloatBits)
15149 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
15150 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
15151 ConvInput);
15152
15153 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
15154 Intrinsic::arm_neon_vcvtfxu2fp;
15155 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
15156 Op.getValueType(),
15157 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
15158 ConvInput, DAG.getConstant(C, dl, MVT::i32));
15159}
15160
15161static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
15162 const ARMSubtarget *ST) {
15163 if (!ST->hasMVEIntegerOps())
15164 return SDValue();
15165
15166 assert(N->getOpcode() == ISD::VECREDUCE_ADD)((N->getOpcode() == ISD::VECREDUCE_ADD) ? static_cast<void
> (0) : __assert_fail ("N->getOpcode() == ISD::VECREDUCE_ADD"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15166, __PRETTY_FUNCTION__))
;
15167 EVT ResVT = N->getValueType(0);
15168 SDValue N0 = N->getOperand(0);
15169 SDLoc dl(N);
15170
15171 // We are looking for something that will have illegal types if left alone,
15172 // but that we can convert to a single instruction undef MVE. For example
15173 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
15174 // or
15175 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
15176
15177 // Cases:
15178 // VADDV u/s 8/16/32
15179 // VMLAV u/s 8/16/32
15180 // VADDLV u/s 32
15181 // VMLALV u/s 16/32
15182
15183 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
15184 // extend it and use v4i32 instead.
15185 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
15186 EVT AVT = A.getValueType();
15187 if (!AVT.is128BitVector())
15188 A = DAG.getNode(ExtendCode, dl,
15189 AVT.changeVectorElementType(MVT::getIntegerVT(
15190 128 / AVT.getVectorMinNumElements())),
15191 A);
15192 return A;
15193 };
15194 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
15195 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
15196 return SDValue();
15197 SDValue A = N0->getOperand(0);
15198 if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
15199 return ExtendIfNeeded(A, ExtendCode);
15200 return SDValue();
15201 };
15202 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
15203 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
15204 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
15205 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
15206 return SDValue();
15207 Mask = N0->getOperand(0);
15208 SDValue Ext = N0->getOperand(1);
15209 if (Ext->getOpcode() != ExtendCode)
15210 return SDValue();
15211 SDValue A = Ext->getOperand(0);
15212 if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; }))
15213 return ExtendIfNeeded(A, ExtendCode);
15214 return SDValue();
15215 };
15216 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
15217 SDValue &A, SDValue &B) {
15218 // For a vmla we are trying to match a larger pattern:
15219 // ExtA = sext/zext A
15220 // ExtB = sext/zext B
15221 // Mul = mul ExtA, ExtB
15222 // vecreduce.add Mul
15223 // There might also be en extra extend between the mul and the addreduce, so
15224 // long as the bitwidth is high enough to make them equivalent (for example
15225 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
15226 if (ResVT != RetTy)
15227 return false;
15228 SDValue Mul = N0;
15229 if (Mul->getOpcode() == ExtendCode &&
15230 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
15231 ResVT.getScalarSizeInBits())
15232 Mul = Mul->getOperand(0);
15233 if (Mul->getOpcode() != ISD::MUL)
15234 return false;
15235 SDValue ExtA = Mul->getOperand(0);
15236 SDValue ExtB = Mul->getOperand(1);
15237 if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
15238 return false;
15239 A = ExtA->getOperand(0);
15240 B = ExtB->getOperand(0);
15241 if (A.getValueType() == B.getValueType() &&
15242 llvm::any_of(ExtTypes,
15243 [&A](MVT Ty) { return A.getValueType() == Ty; })) {
15244 A = ExtendIfNeeded(A, ExtendCode);
15245 B = ExtendIfNeeded(B, ExtendCode);
15246 return true;
15247 }
15248 return false;
15249 };
15250 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
15251 SDValue &A, SDValue &B, SDValue &Mask) {
15252 // Same as the pattern above with a select for the zero predicated lanes
15253 // ExtA = sext/zext A
15254 // ExtB = sext/zext B
15255 // Mul = mul ExtA, ExtB
15256 // N0 = select Mask, Mul, 0
15257 // vecreduce.add N0
15258 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
15259 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
15260 return false;
15261 Mask = N0->getOperand(0);
15262 SDValue Mul = N0->getOperand(1);
15263 if (Mul->getOpcode() == ExtendCode &&
15264 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
15265 ResVT.getScalarSizeInBits())
15266 Mul = Mul->getOperand(0);
15267 if (Mul->getOpcode() != ISD::MUL)
15268 return false;
15269 SDValue ExtA = Mul->getOperand(0);
15270 SDValue ExtB = Mul->getOperand(1);
15271 if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
15272 return false;
15273 A = ExtA->getOperand(0);
15274 B = ExtB->getOperand(0);
15275 if (A.getValueType() == B.getValueType() &&
15276 llvm::any_of(ExtTypes,
15277 [&A](MVT Ty) { return A.getValueType() == Ty; })) {
15278 A = ExtendIfNeeded(A, ExtendCode);
15279 B = ExtendIfNeeded(B, ExtendCode);
15280 return true;
15281 }
15282 return false;
15283 };
15284 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
15285 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
15286 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
15287 SDValue(Node.getNode(), 1));
15288 };
15289
15290 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
15291 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
15292 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
15293 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
15294 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND,
15295 {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
15296 return Create64bitNode(ARMISD::VADDLVs, {A});
15297 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND,
15298 {MVT::v4i8, MVT::v4i16, MVT::v4i32}))
15299 return Create64bitNode(ARMISD::VADDLVu, {A});
15300 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
15301 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15302 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
15303 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
15304 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15305 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
15306
15307 SDValue Mask;
15308 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
15309 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
15310 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
15311 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
15312 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND,
15313 {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
15314 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
15315 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND,
15316 {MVT::v4i8, MVT::v4i16, MVT::v4i32}, Mask))
15317 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
15318 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
15319 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15320 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
15321 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
15322 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15323 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
15324
15325 SDValue A, B;
15326 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
15327 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
15328 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
15329 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
15330 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND,
15331 {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
15332 return Create64bitNode(ARMISD::VMLALVs, {A, B});
15333 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND,
15334 {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A, B))
15335 return Create64bitNode(ARMISD::VMLALVu, {A, B});
15336 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
15337 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15338 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
15339 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
15340 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15341 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
15342
15343 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
15344 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
15345 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B, Mask))
15346 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
15347 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND,
15348 {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
15349 B, Mask))
15350 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
15351 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND,
15352 {MVT::v8i8, MVT::v8i16, MVT::v4i8, MVT::v4i16, MVT::v4i32}, A,
15353 B, Mask))
15354 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
15355 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
15356 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15357 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
15358 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
15359 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
15360 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
15361
15362 // Some complications. We can get a case where the two inputs of the mul are
15363 // the same, then the output sext will have been helpfully converted to a
15364 // zext. Turn it back.
15365 SDValue Op = N0;
15366 if (Op->getOpcode() == ISD::VSELECT)
15367 Op = Op->getOperand(1);
15368 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
15369 Op->getOperand(0)->getOpcode() == ISD::MUL) {
15370 SDValue Mul = Op->getOperand(0);
15371 if (Mul->getOperand(0) == Mul->getOperand(1) &&
15372 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
15373 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
15374 if (Op != N0)
15375 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
15376 N0->getOperand(0), Ext, N0->getOperand(2));
15377 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
15378 }
15379 }
15380
15381 return SDValue();
15382}
15383
15384static SDValue PerformVMOVNCombine(SDNode *N,
15385 TargetLowering::DAGCombinerInfo &DCI) {
15386 SDValue Op0 = N->getOperand(0);
15387 SDValue Op1 = N->getOperand(1);
15388 unsigned IsTop = N->getConstantOperandVal(2);
15389
15390 // VMOVNT a undef -> a
15391 // VMOVNB a undef -> a
15392 // VMOVNB undef a -> a
15393 if (Op1->isUndef())
15394 return Op0;
15395 if (Op0->isUndef() && !IsTop)
15396 return Op1;
15397
15398 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
15399 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
15400 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
15401 Op1->getOpcode() == ARMISD::VQMOVNu) &&
15402 Op1->getConstantOperandVal(2) == 0)
15403 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
15404 Op0, Op1->getOperand(1), N->getOperand(2));
15405
15406 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
15407 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
15408 // into the top or bottom lanes.
15409 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15410 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
15411 APInt Op0DemandedElts =
15412 IsTop ? Op1DemandedElts
15413 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
15414
15415 APInt KnownUndef, KnownZero;
15416 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15417 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
15418 KnownZero, DCI))
15419 return SDValue(N, 0);
15420 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, KnownUndef,
15421 KnownZero, DCI))
15422 return SDValue(N, 0);
15423
15424 return SDValue();
15425}
15426
15427static SDValue PerformVQMOVNCombine(SDNode *N,
15428 TargetLowering::DAGCombinerInfo &DCI) {
15429 SDValue Op0 = N->getOperand(0);
15430 unsigned IsTop = N->getConstantOperandVal(2);
15431
15432 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15433 APInt Op0DemandedElts =
15434 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
15435 : APInt::getHighBitsSet(2, 1));
15436
15437 APInt KnownUndef, KnownZero;
15438 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15439 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, KnownUndef,
15440 KnownZero, DCI))
15441 return SDValue(N, 0);
15442 return SDValue();
15443}
15444
15445static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
15446 SDLoc DL(N);
15447 SDValue Op0 = N->getOperand(0);
15448 SDValue Op1 = N->getOperand(1);
15449
15450 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
15451 // uses of the intrinsics.
15452 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
15453 int ShiftAmt = C->getSExtValue();
15454 if (ShiftAmt == 0) {
15455 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
15456 DAG.ReplaceAllUsesWith(N, Merge.getNode());
15457 return SDValue();
15458 }
15459
15460 if (ShiftAmt >= -32 && ShiftAmt < 0) {
15461 unsigned NewOpcode =
15462 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
15463 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
15464 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
15465 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
15466 return NewShift;
15467 }
15468 }
15469
15470 return SDValue();
15471}
15472
15473/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
15474SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
15475 DAGCombinerInfo &DCI) const {
15476 SelectionDAG &DAG = DCI.DAG;
15477 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15478 switch (IntNo) {
15479 default:
15480 // Don't do anything for most intrinsics.
15481 break;
15482
15483 // Vector shifts: check for immediate versions and lower them.
15484 // Note: This is done during DAG combining instead of DAG legalizing because
15485 // the build_vectors for 64-bit vector element shift counts are generally
15486 // not legal, and it is hard to see their values after they get legalized to
15487 // loads from a constant pool.
15488 case Intrinsic::arm_neon_vshifts:
15489 case Intrinsic::arm_neon_vshiftu:
15490 case Intrinsic::arm_neon_vrshifts:
15491 case Intrinsic::arm_neon_vrshiftu:
15492 case Intrinsic::arm_neon_vrshiftn:
15493 case Intrinsic::arm_neon_vqshifts:
15494 case Intrinsic::arm_neon_vqshiftu:
15495 case Intrinsic::arm_neon_vqshiftsu:
15496 case Intrinsic::arm_neon_vqshiftns:
15497 case Intrinsic::arm_neon_vqshiftnu:
15498 case Intrinsic::arm_neon_vqshiftnsu:
15499 case Intrinsic::arm_neon_vqrshiftns:
15500 case Intrinsic::arm_neon_vqrshiftnu:
15501 case Intrinsic::arm_neon_vqrshiftnsu: {
15502 EVT VT = N->getOperand(1).getValueType();
15503 int64_t Cnt;
15504 unsigned VShiftOpc = 0;
15505
15506 switch (IntNo) {
15507 case Intrinsic::arm_neon_vshifts:
15508 case Intrinsic::arm_neon_vshiftu:
15509 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
15510 VShiftOpc = ARMISD::VSHLIMM;
15511 break;
15512 }
15513 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
15514 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
15515 : ARMISD::VSHRuIMM);
15516 break;
15517 }
15518 return SDValue();
15519
15520 case Intrinsic::arm_neon_vrshifts:
15521 case Intrinsic::arm_neon_vrshiftu:
15522 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
15523 break;
15524 return SDValue();
15525
15526 case Intrinsic::arm_neon_vqshifts:
15527 case Intrinsic::arm_neon_vqshiftu:
15528 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
15529 break;
15530 return SDValue();
15531
15532 case Intrinsic::arm_neon_vqshiftsu:
15533 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
15534 break;
15535 llvm_unreachable("invalid shift count for vqshlu intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vqshlu intrinsic"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15535)
;
15536
15537 case Intrinsic::arm_neon_vrshiftn:
15538 case Intrinsic::arm_neon_vqshiftns:
15539 case Intrinsic::arm_neon_vqshiftnu:
15540 case Intrinsic::arm_neon_vqshiftnsu:
15541 case Intrinsic::arm_neon_vqrshiftns:
15542 case Intrinsic::arm_neon_vqrshiftnu:
15543 case Intrinsic::arm_neon_vqrshiftnsu:
15544 // Narrowing shifts require an immediate right shift.
15545 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
15546 break;
15547 llvm_unreachable("invalid shift count for narrowing vector shift "::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15548)
15548 "intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15548)
;
15549
15550 default:
15551 llvm_unreachable("unhandled vector shift")::llvm::llvm_unreachable_internal("unhandled vector shift", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15551)
;
15552 }
15553
15554 switch (IntNo) {
15555 case Intrinsic::arm_neon_vshifts:
15556 case Intrinsic::arm_neon_vshiftu:
15557 // Opcode already set above.
15558 break;
15559 case Intrinsic::arm_neon_vrshifts:
15560 VShiftOpc = ARMISD::VRSHRsIMM;
15561 break;
15562 case Intrinsic::arm_neon_vrshiftu:
15563 VShiftOpc = ARMISD::VRSHRuIMM;
15564 break;
15565 case Intrinsic::arm_neon_vrshiftn:
15566 VShiftOpc = ARMISD::VRSHRNIMM;
15567 break;
15568 case Intrinsic::arm_neon_vqshifts:
15569 VShiftOpc = ARMISD::VQSHLsIMM;
15570 break;
15571 case Intrinsic::arm_neon_vqshiftu:
15572 VShiftOpc = ARMISD::VQSHLuIMM;
15573 break;
15574 case Intrinsic::arm_neon_vqshiftsu:
15575 VShiftOpc = ARMISD::VQSHLsuIMM;
15576 break;
15577 case Intrinsic::arm_neon_vqshiftns:
15578 VShiftOpc = ARMISD::VQSHRNsIMM;
15579 break;
15580 case Intrinsic::arm_neon_vqshiftnu:
15581 VShiftOpc = ARMISD::VQSHRNuIMM;
15582 break;
15583 case Intrinsic::arm_neon_vqshiftnsu:
15584 VShiftOpc = ARMISD::VQSHRNsuIMM;
15585 break;
15586 case Intrinsic::arm_neon_vqrshiftns:
15587 VShiftOpc = ARMISD::VQRSHRNsIMM;
15588 break;
15589 case Intrinsic::arm_neon_vqrshiftnu:
15590 VShiftOpc = ARMISD::VQRSHRNuIMM;
15591 break;
15592 case Intrinsic::arm_neon_vqrshiftnsu:
15593 VShiftOpc = ARMISD::VQRSHRNsuIMM;
15594 break;
15595 }
15596
15597 SDLoc dl(N);
15598 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
15599 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
15600 }
15601
15602 case Intrinsic::arm_neon_vshiftins: {
15603 EVT VT = N->getOperand(1).getValueType();
15604 int64_t Cnt;
15605 unsigned VShiftOpc = 0;
15606
15607 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
15608 VShiftOpc = ARMISD::VSLIIMM;
15609 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
15610 VShiftOpc = ARMISD::VSRIIMM;
15611 else {
15612 llvm_unreachable("invalid shift count for vsli/vsri intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vsli/vsri intrinsic"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15612)
;
15613 }
15614
15615 SDLoc dl(N);
15616 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
15617 N->getOperand(1), N->getOperand(2),
15618 DAG.getConstant(Cnt, dl, MVT::i32));
15619 }
15620
15621 case Intrinsic::arm_neon_vqrshifts:
15622 case Intrinsic::arm_neon_vqrshiftu:
15623 // No immediate versions of these to check for.
15624 break;
15625
15626 case Intrinsic::arm_mve_vqdmlah:
15627 case Intrinsic::arm_mve_vqdmlash:
15628 case Intrinsic::arm_mve_vqrdmlah:
15629 case Intrinsic::arm_mve_vqrdmlash:
15630 case Intrinsic::arm_mve_vmla_n_predicated:
15631 case Intrinsic::arm_mve_vmlas_n_predicated:
15632 case Intrinsic::arm_mve_vqdmlah_predicated:
15633 case Intrinsic::arm_mve_vqdmlash_predicated:
15634 case Intrinsic::arm_mve_vqrdmlah_predicated:
15635 case Intrinsic::arm_mve_vqrdmlash_predicated: {
15636 // These intrinsics all take an i32 scalar operand which is narrowed to the
15637 // size of a single lane of the vector type they return. So we don't need
15638 // any bits of that operand above that point, which allows us to eliminate
15639 // uxth/sxth.
15640 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
15641 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
15642 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
15643 return SDValue();
15644 break;
15645 }
15646
15647 case Intrinsic::arm_mve_minv:
15648 case Intrinsic::arm_mve_maxv:
15649 case Intrinsic::arm_mve_minav:
15650 case Intrinsic::arm_mve_maxav:
15651 case Intrinsic::arm_mve_minv_predicated:
15652 case Intrinsic::arm_mve_maxv_predicated:
15653 case Intrinsic::arm_mve_minav_predicated:
15654 case Intrinsic::arm_mve_maxav_predicated: {
15655 // These intrinsics all take an i32 scalar operand which is narrowed to the
15656 // size of a single lane of the vector type they take as the other input.
15657 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
15658 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
15659 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
15660 return SDValue();
15661 break;
15662 }
15663
15664 case Intrinsic::arm_mve_addv: {
15665 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
15666 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
15667 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15668 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
15669 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
15670 }
15671
15672 case Intrinsic::arm_mve_addlv:
15673 case Intrinsic::arm_mve_addlv_predicated: {
15674 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
15675 // which recombines the two outputs into an i64
15676 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15677 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
15678 (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
15679 (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
15680
15681 SmallVector<SDValue, 4> Ops;
15682 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
15683 if (i != 2) // skip the unsigned flag
15684 Ops.push_back(N->getOperand(i));
15685
15686 SDLoc dl(N);
15687 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
15688 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
15689 val.getValue(1));
15690 }
15691 }
15692
15693 return SDValue();
15694}
15695
15696/// PerformShiftCombine - Checks for immediate versions of vector shifts and
15697/// lowers them. As with the vector shift intrinsics, this is done during DAG
15698/// combining instead of DAG legalizing because the build_vectors for 64-bit
15699/// vector element shift counts are generally not legal, and it is hard to see
15700/// their values after they get legalized to loads from a constant pool.
15701static SDValue PerformShiftCombine(SDNode *N,
15702 TargetLowering::DAGCombinerInfo &DCI,
15703 const ARMSubtarget *ST) {
15704 SelectionDAG &DAG = DCI.DAG;
15705 EVT VT = N->getValueType(0);
15706 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
15707 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
15708 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
15709 SDValue N1 = N->getOperand(1);
15710 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
15711 SDValue N0 = N->getOperand(0);
15712 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
15713 DAG.MaskedValueIsZero(N0.getOperand(0),
15714 APInt::getHighBitsSet(32, 16)))
15715 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
15716 }
15717 }
15718
15719 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
15720 N->getOperand(0)->getOpcode() == ISD::AND &&
15721 N->getOperand(0)->hasOneUse()) {
15722 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15723 return SDValue();
15724 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
15725 // usually show up because instcombine prefers to canonicalize it to
15726 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
15727 // out of GEP lowering in some cases.
15728 SDValue N0 = N->getOperand(0);
15729 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
15730 if (!ShiftAmtNode)
15731 return SDValue();
15732 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
15733 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15734 if (!AndMaskNode)
15735 return SDValue();
15736 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
15737 // Don't transform uxtb/uxth.
15738 if (AndMask == 255 || AndMask == 65535)
15739 return SDValue();
15740 if (isMask_32(AndMask)) {
15741 uint32_t MaskedBits = countLeadingZeros(AndMask);
15742 if (MaskedBits > ShiftAmt) {
15743 SDLoc DL(N);
15744 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
15745 DAG.getConstant(MaskedBits, DL, MVT::i32));
15746 return DAG.getNode(
15747 ISD::SRL, DL, MVT::i32, SHL,
15748 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
15749 }
15750 }
15751 }
15752
15753 // Nothing to be done for scalar shifts.
15754 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15755 if (!VT.isVector() || !TLI.isTypeLegal(VT))
15756 return SDValue();
15757 if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
15758 return SDValue();
15759
15760 int64_t Cnt;
15761
15762 switch (N->getOpcode()) {
15763 default: llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15763)
;
15764
15765 case ISD::SHL:
15766 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
15767 SDLoc dl(N);
15768 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
15769 DAG.getConstant(Cnt, dl, MVT::i32));
15770 }
15771 break;
15772
15773 case ISD::SRA:
15774 case ISD::SRL:
15775 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
15776 unsigned VShiftOpc =
15777 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
15778 SDLoc dl(N);
15779 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
15780 DAG.getConstant(Cnt, dl, MVT::i32));
15781 }
15782 }
15783 return SDValue();
15784}
15785
15786// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
15787// split into multiple extending loads, which are simpler to deal with than an
15788// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
15789// to convert the type to an f32.
15790static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
15791 SDValue N0 = N->getOperand(0);
15792 if (N0.getOpcode() != ISD::LOAD)
15793 return SDValue();
15794 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
15795 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
15796 LD->getExtensionType() != ISD::NON_EXTLOAD)
15797 return SDValue();
15798 EVT FromVT = LD->getValueType(0);
15799 EVT ToVT = N->getValueType(0);
15800 if (!ToVT.isVector())
15801 return SDValue();
15802 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())((FromVT.getVectorNumElements() == ToVT.getVectorNumElements(
)) ? static_cast<void> (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15802, __PRETTY_FUNCTION__))
;
15803 EVT ToEltVT = ToVT.getVectorElementType();
15804 EVT FromEltVT = FromVT.getVectorElementType();
15805
15806 unsigned NumElements = 0;
15807 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
15808 NumElements = 4;
15809 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
15810 NumElements = 8;
15811 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
15812 NumElements = 4;
15813 if (NumElements == 0 ||
15814 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
15815 FromVT.getVectorNumElements() % NumElements != 0 ||
15816 !isPowerOf2_32(NumElements))
15817 return SDValue();
15818
15819 LLVMContext &C = *DAG.getContext();
15820 SDLoc DL(LD);
15821 // Details about the old load
15822 SDValue Ch = LD->getChain();
15823 SDValue BasePtr = LD->getBasePtr();
15824 Align Alignment = LD->getOriginalAlign();
15825 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
15826 AAMDNodes AAInfo = LD->getAAInfo();
15827
15828 ISD::LoadExtType NewExtType =
15829 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
15830 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
15831 EVT NewFromVT = EVT::getVectorVT(
15832 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
15833 EVT NewToVT = EVT::getVectorVT(
15834 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
15835
15836 SmallVector<SDValue, 4> Loads;
15837 SmallVector<SDValue, 4> Chains;
15838 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
15839 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
15840 SDValue NewPtr =
15841 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
15842
15843 SDValue NewLoad =
15844 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
15845 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
15846 Alignment, MMOFlags, AAInfo);
15847 Loads.push_back(NewLoad);
15848 Chains.push_back(SDValue(NewLoad.getNode(), 1));
15849 }
15850
15851 // Float truncs need to extended with VCVTB's into their floating point types.
15852 if (FromEltVT == MVT::f16) {
15853 SmallVector<SDValue, 4> Extends;
15854
15855 for (unsigned i = 0; i < Loads.size(); i++) {
15856 SDValue LoadBC =
15857 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
15858 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
15859 DAG.getConstant(0, DL, MVT::i32));
15860 Extends.push_back(FPExt);
15861 }
15862
15863 Loads = Extends;
15864 }
15865
15866 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
15867 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
15868 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
15869}
15870
15871/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
15872/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
15873static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
15874 const ARMSubtarget *ST) {
15875 SDValue N0 = N->getOperand(0);
15876
15877 // Check for sign- and zero-extensions of vector extract operations of 8- and
15878 // 16-bit vector elements. NEON and MVE support these directly. They are
15879 // handled during DAG combining because type legalization will promote them
15880 // to 32-bit types and it is messy to recognize the operations after that.
15881 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
15882 N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15883 SDValue Vec = N0.getOperand(0);
15884 SDValue Lane = N0.getOperand(1);
15885 EVT VT = N->getValueType(0);
15886 EVT EltVT = N0.getValueType();
15887 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15888
15889 if (VT == MVT::i32 &&
15890 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
15891 TLI.isTypeLegal(Vec.getValueType()) &&
15892 isa<ConstantSDNode>(Lane)) {
15893
15894 unsigned Opc = 0;
15895 switch (N->getOpcode()) {
15896 default: llvm_unreachable("unexpected opcode")::llvm::llvm_unreachable_internal("unexpected opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 15896)
;
15897 case ISD::SIGN_EXTEND:
15898 Opc = ARMISD::VGETLANEs;
15899 break;
15900 case ISD::ZERO_EXTEND:
15901 case ISD::ANY_EXTEND:
15902 Opc = ARMISD::VGETLANEu;
15903 break;
15904 }
15905 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
15906 }
15907 }
15908
15909 if (ST->hasMVEIntegerOps())
15910 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
15911 return NewLoad;
15912
15913 return SDValue();
15914}
15915
15916static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
15917 const ARMSubtarget *ST) {
15918 if (ST->hasMVEFloatOps())
15919 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
15920 return NewLoad;
15921
15922 return SDValue();
15923}
15924
15925/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
15926/// saturates.
15927static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
15928 const ARMSubtarget *ST) {
15929 EVT VT = N->getValueType(0);
15930 SDValue N0 = N->getOperand(0);
15931 if (!ST->hasMVEIntegerOps())
15932 return SDValue();
15933
15934 if (SDValue V = PerformVQDMULHCombine(N, DAG))
15935 return V;
15936
15937 if (VT != MVT::v4i32 && VT != MVT::v8i16)
15938 return SDValue();
15939
15940 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
15941 // Check one is a smin and the other is a smax
15942 if (Min->getOpcode() != ISD::SMIN)
15943 std::swap(Min, Max);
15944 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
15945 return false;
15946
15947 APInt SaturateC;
15948 if (VT == MVT::v4i32)
15949 SaturateC = APInt(32, (1 << 15) - 1, true);
15950 else //if (VT == MVT::v8i16)
15951 SaturateC = APInt(16, (1 << 7) - 1, true);
15952
15953 APInt MinC, MaxC;
15954 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
15955 MinC != SaturateC)
15956 return false;
15957 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
15958 MaxC != ~SaturateC)
15959 return false;
15960 return true;
15961 };
15962
15963 if (IsSignedSaturate(N, N0.getNode())) {
15964 SDLoc DL(N);
15965 MVT ExtVT, HalfVT;
15966 if (VT == MVT::v4i32) {
15967 HalfVT = MVT::v8i16;
15968 ExtVT = MVT::v4i16;
15969 } else { // if (VT == MVT::v8i16)
15970 HalfVT = MVT::v16i8;
15971 ExtVT = MVT::v8i8;
15972 }
15973
15974 // Create a VQMOVNB with undef top lanes, then signed extended into the top
15975 // half. That extend will hopefully be removed if only the bottom bits are
15976 // demanded (though a truncating store, for example).
15977 SDValue VQMOVN =
15978 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
15979 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
15980 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
15981 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
15982 DAG.getValueType(ExtVT));
15983 }
15984
15985 auto IsUnsignedSaturate = [&](SDNode *Min) {
15986 // For unsigned, we just need to check for <= 0xffff
15987 if (Min->getOpcode() != ISD::UMIN)
15988 return false;
15989
15990 APInt SaturateC;
15991 if (VT == MVT::v4i32)
15992 SaturateC = APInt(32, (1 << 16) - 1, true);
15993 else //if (VT == MVT::v8i16)
15994 SaturateC = APInt(16, (1 << 8) - 1, true);
15995
15996 APInt MinC;
15997 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
15998 MinC != SaturateC)
15999 return false;
16000 return true;
16001 };
16002
16003 if (IsUnsignedSaturate(N)) {
16004 SDLoc DL(N);
16005 MVT HalfVT;
16006 unsigned ExtConst;
16007 if (VT == MVT::v4i32) {
16008 HalfVT = MVT::v8i16;
16009 ExtConst = 0x0000FFFF;
16010 } else { //if (VT == MVT::v8i16)
16011 HalfVT = MVT::v16i8;
16012 ExtConst = 0x00FF;
16013 }
16014
16015 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
16016 // an AND. That extend will hopefully be removed if only the bottom bits are
16017 // demanded (though a truncating store, for example).
16018 SDValue VQMOVN =
16019 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
16020 DAG.getConstant(0, DL, MVT::i32));
16021 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
16022 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
16023 DAG.getConstant(ExtConst, DL, VT));
16024 }
16025
16026 return SDValue();
16027}
16028
16029static const APInt *isPowerOf2Constant(SDValue V) {
16030 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
16031 if (!C)
16032 return nullptr;
16033 const APInt *CV = &C->getAPIntValue();
16034 return CV->isPowerOf2() ? CV : nullptr;
16035}
16036
16037SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
16038 // If we have a CMOV, OR and AND combination such as:
16039 // if (x & CN)
16040 // y |= CM;
16041 //
16042 // And:
16043 // * CN is a single bit;
16044 // * All bits covered by CM are known zero in y
16045 //
16046 // Then we can convert this into a sequence of BFI instructions. This will
16047 // always be a win if CM is a single bit, will always be no worse than the
16048 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
16049 // three bits (due to the extra IT instruction).
16050
16051 SDValue Op0 = CMOV->getOperand(0);
16052 SDValue Op1 = CMOV->getOperand(1);
16053 auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
16054 auto CC = CCNode->getAPIntValue().getLimitedValue();
16055 SDValue CmpZ = CMOV->getOperand(4);
16056
16057 // The compare must be against zero.
16058 if (!isNullConstant(CmpZ->getOperand(1)))
16059 return SDValue();
16060
16061 assert(CmpZ->getOpcode() == ARMISD::CMPZ)((CmpZ->getOpcode() == ARMISD::CMPZ) ? static_cast<void
> (0) : __assert_fail ("CmpZ->getOpcode() == ARMISD::CMPZ"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16061, __PRETTY_FUNCTION__))
;
16062 SDValue And = CmpZ->getOperand(0);
16063 if (And->getOpcode() != ISD::AND)
16064 return SDValue();
16065 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
16066 if (!AndC)
16067 return SDValue();
16068 SDValue X = And->getOperand(0);
16069
16070 if (CC == ARMCC::EQ) {
16071 // We're performing an "equal to zero" compare. Swap the operands so we
16072 // canonicalize on a "not equal to zero" compare.
16073 std::swap(Op0, Op1);
16074 } else {
16075 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?")((CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"
) ? static_cast<void> (0) : __assert_fail ("CC == ARMCC::NE && \"How can a CMPZ node not be EQ or NE?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16075, __PRETTY_FUNCTION__))
;
16076 }
16077
16078 if (Op1->getOpcode() != ISD::OR)
16079 return SDValue();
16080
16081 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
16082 if (!OrC)
16083 return SDValue();
16084 SDValue Y = Op1->getOperand(0);
16085
16086 if (Op0 != Y)
16087 return SDValue();
16088
16089 // Now, is it profitable to continue?
16090 APInt OrCI = OrC->getAPIntValue();
16091 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
16092 if (OrCI.countPopulation() > Heuristic)
16093 return SDValue();
16094
16095 // Lastly, can we determine that the bits defined by OrCI
16096 // are zero in Y?
16097 KnownBits Known = DAG.computeKnownBits(Y);
16098 if ((OrCI & Known.Zero) != OrCI)
16099 return SDValue();
16100
16101 // OK, we can do the combine.
16102 SDValue V = Y;
16103 SDLoc dl(X);
16104 EVT VT = X.getValueType();
16105 unsigned BitInX = AndC->logBase2();
16106
16107 if (BitInX != 0) {
16108 // We must shift X first.
16109 X = DAG.getNode(ISD::SRL, dl, VT, X,
16110 DAG.getConstant(BitInX, dl, VT));
16111 }
16112
16113 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
16114 BitInY < NumActiveBits; ++BitInY) {
16115 if (OrCI[BitInY] == 0)
16116 continue;
16117 APInt Mask(VT.getSizeInBits(), 0);
16118 Mask.setBit(BitInY);
16119 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
16120 // Confusingly, the operand is an *inverted* mask.
16121 DAG.getConstant(~Mask, dl, VT));
16122 }
16123
16124 return V;
16125}
16126
16127// Given N, the value controlling the conditional branch, search for the loop
16128// intrinsic, returning it, along with how the value is used. We need to handle
16129// patterns such as the following:
16130// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
16131// (brcond (setcc (loop.decrement), 0, eq), exit)
16132// (brcond (setcc (loop.decrement), 0, ne), header)
16133static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
16134 bool &Negate) {
16135 switch (N->getOpcode()) {
16136 default:
16137 break;
16138 case ISD::XOR: {
16139 if (!isa<ConstantSDNode>(N.getOperand(1)))
16140 return SDValue();
16141 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
16142 return SDValue();
16143 Negate = !Negate;
16144 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
16145 }
16146 case ISD::SETCC: {
16147 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
16148 if (!Const)
16149 return SDValue();
16150 if (Const->isNullValue())
16151 Imm = 0;
16152 else if (Const->isOne())
16153 Imm = 1;
16154 else
16155 return SDValue();
16156 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
16157 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
16158 }
16159 case ISD::INTRINSIC_W_CHAIN: {
16160 unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
16161 if (IntOp != Intrinsic::test_start_loop_iterations &&
16162 IntOp != Intrinsic::loop_decrement_reg)
16163 return SDValue();
16164 return N;
16165 }
16166 }
16167 return SDValue();
16168}
16169
16170static SDValue PerformHWLoopCombine(SDNode *N,
16171 TargetLowering::DAGCombinerInfo &DCI,
16172 const ARMSubtarget *ST) {
16173
16174 // The hwloop intrinsics that we're interested are used for control-flow,
16175 // either for entering or exiting the loop:
16176 // - test.start.loop.iterations will test whether its operand is zero. If it
16177 // is zero, the proceeding branch should not enter the loop.
16178 // - loop.decrement.reg also tests whether its operand is zero. If it is
16179 // zero, the proceeding branch should not branch back to the beginning of
16180 // the loop.
16181 // So here, we need to check that how the brcond is using the result of each
16182 // of the intrinsics to ensure that we're branching to the right place at the
16183 // right time.
16184
16185 ISD::CondCode CC;
16186 SDValue Cond;
16187 int Imm = 1;
16188 bool Negate = false;
16189 SDValue Chain = N->getOperand(0);
16190 SDValue Dest;
16191
16192 if (N->getOpcode() == ISD::BRCOND) {
16193 CC = ISD::SETEQ;
16194 Cond = N->getOperand(1);
16195 Dest = N->getOperand(2);
16196 } else {
16197 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!")((N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"Expected BRCOND or BR_CC!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16197, __PRETTY_FUNCTION__))
;
16198 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16199 Cond = N->getOperand(2);
16200 Dest = N->getOperand(4);
16201 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
16202 if (!Const->isOne() && !Const->isNullValue())
16203 return SDValue();
16204 Imm = Const->getZExtValue();
16205 } else
16206 return SDValue();
16207 }
16208
16209 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
16210 if (!Int)
16211 return SDValue();
16212
16213 if (Negate)
16214 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
16215
16216 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
16217 return (CC == ISD::SETEQ && Imm == 0) ||
16218 (CC == ISD::SETNE && Imm == 1) ||
16219 (CC == ISD::SETLT && Imm == 1) ||
16220 (CC == ISD::SETULT && Imm == 1);
16221 };
16222
16223 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
16224 return (CC == ISD::SETEQ && Imm == 1) ||
16225 (CC == ISD::SETNE && Imm == 0) ||
16226 (CC == ISD::SETGT && Imm == 0) ||
16227 (CC == ISD::SETUGT && Imm == 0) ||
16228 (CC == ISD::SETGE && Imm == 1) ||
16229 (CC == ISD::SETUGE && Imm == 1);
16230 };
16231
16232 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&(((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
"unsupported condition") ? static_cast<void> (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16233, __PRETTY_FUNCTION__))
16233 "unsupported condition")(((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
"unsupported condition") ? static_cast<void> (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16233, __PRETTY_FUNCTION__))
;
16234
16235 SDLoc dl(Int);
16236 SelectionDAG &DAG = DCI.DAG;
16237 SDValue Elements = Int.getOperand(2);
16238 unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
16239 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)(((N->hasOneUse() && N->use_begin()->getOpcode
() == ISD::BR) && "expected single br user") ? static_cast
<void> (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16240, __PRETTY_FUNCTION__))
16240 && "expected single br user")(((N->hasOneUse() && N->use_begin()->getOpcode
() == ISD::BR) && "expected single br user") ? static_cast
<void> (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 16240, __PRETTY_FUNCTION__))
;
16241 SDNode *Br = *N->use_begin();
16242 SDValue OtherTarget = Br->getOperand(1);
16243
16244 // Update the unconditional branch to branch to the given Dest.
16245 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
16246 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
16247 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
16248 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
16249 };
16250
16251 if (IntOp == Intrinsic::test_start_loop_iterations) {
16252 SDValue Res;
16253 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
16254 // We expect this 'instruction' to branch when the counter is zero.
16255 if (IsTrueIfZero(CC, Imm)) {
16256 SDValue Ops[] = {Chain, Setup, Dest};
16257 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
16258 } else {
16259 // The logic is the reverse of what we need for WLS, so find the other
16260 // basic block target: the target of the proceeding br.
16261 UpdateUncondBr(Br, Dest, DAG);
16262
16263 SDValue Ops[] = {Chain, Setup, OtherTarget};
16264 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
16265 }
16266 // Update LR count to the new value
16267 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
16268 // Update chain
16269 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
16270 return Res;
16271 } else {
16272 SDValue Size = DAG.getTargetConstant(
16273 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
16274 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
16275 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
16276 DAG.getVTList(MVT::i32, MVT::Other), Args);
16277 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
16278
16279 // We expect this instruction to branch when the count is not zero.
16280 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
16281
16282 // Update the unconditional branch to target the loop preheader if we've
16283 // found the condition has been reversed.
16284 if (Target == OtherTarget)
16285 UpdateUncondBr(Br, Dest, DAG);
16286
16287 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16288 SDValue(LoopDec.getNode(), 1), Chain);
16289
16290 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
16291 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
16292 }
16293 return SDValue();
16294}
16295
16296/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
16297SDValue
16298ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
16299 SDValue Cmp = N->getOperand(4);
16300 if (Cmp.getOpcode() != ARMISD::CMPZ)
16301 // Only looking at NE cases.
16302 return SDValue();
16303
16304 EVT VT = N->getValueType(0);
16305 SDLoc dl(N);
16306 SDValue LHS = Cmp.getOperand(0);
16307 SDValue RHS = Cmp.getOperand(1);
16308 SDValue Chain = N->getOperand(0);
16309 SDValue BB = N->getOperand(1);
16310 SDValue ARMcc = N->getOperand(2);
16311 ARMCC::CondCodes CC =
16312 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
16313
16314 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
16315 // -> (brcond Chain BB CC CPSR Cmp)
16316 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
16317 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
16318 LHS->getOperand(0)->hasOneUse()) {
16319 auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
16320 auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
16321 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
16322 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
16323 if ((LHS00C && LHS00C->getZExtValue() == 0) &&
16324 (LHS01C && LHS01C->getZExtValue() == 1) &&
16325 (LHS1C && LHS1C->getZExtValue() == 1) &&
16326 (RHSC && RHSC->getZExtValue() == 0)) {
16327 return DAG.getNode(
16328 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
16329 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
16330 }
16331 }
16332
16333 return SDValue();
16334}
16335
16336/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
16337SDValue
16338ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
16339 SDValue Cmp = N->getOperand(4);
16340 if (Cmp.getOpcode() != ARMISD::CMPZ)
16341 // Only looking at EQ and NE cases.
16342 return SDValue();
16343
16344 EVT VT = N->getValueType(0);
16345 SDLoc dl(N);
16346 SDValue LHS = Cmp.getOperand(0);
16347 SDValue RHS = Cmp.getOperand(1);
16348 SDValue FalseVal = N->getOperand(0);
16349 SDValue TrueVal = N->getOperand(1);
16350 SDValue ARMcc = N->getOperand(2);
16351 ARMCC::CondCodes CC =
16352 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
16353
16354 // BFI is only available on V6T2+.
16355 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
16356 SDValue R = PerformCMOVToBFICombine(N, DAG);
16357 if (R)
16358 return R;
16359 }
16360
16361 // Simplify
16362 // mov r1, r0
16363 // cmp r1, x
16364 // mov r0, y
16365 // moveq r0, x
16366 // to
16367 // cmp r0, x
16368 // movne r0, y
16369 //
16370 // mov r1, r0
16371 // cmp r1, x
16372 // mov r0, x
16373 // movne r0, y
16374 // to
16375 // cmp r0, x
16376 // movne r0, y
16377 /// FIXME: Turn this into a target neutral optimization?
16378 SDValue Res;
16379 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
16380 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
16381 N->getOperand(3), Cmp);
16382 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
16383 SDValue ARMcc;
16384 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
16385 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
16386 N->getOperand(3), NewCmp);
16387 }
16388
16389 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
16390 // -> (cmov F T CC CPSR Cmp)
16391 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
16392 auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
16393 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
16394 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
16395 if ((LHS0C && LHS0C->getZExtValue() == 0) &&
16396 (LHS1C && LHS1C->getZExtValue() == 1) &&
16397 (RHSC && RHSC->getZExtValue() == 0)) {
16398 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
16399 LHS->getOperand(2), LHS->getOperand(3),
16400 LHS->getOperand(4));
16401 }
16402 }
16403
16404 if (!VT.isInteger())
16405 return SDValue();
16406
16407 // Materialize a boolean comparison for integers so we can avoid branching.
16408 if (isNullConstant(FalseVal)) {
16409 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
16410 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
16411 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
16412 // right 5 bits will make that 32 be 1, otherwise it will be 0.
16413 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
16414 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
16415 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
16416 DAG.getConstant(5, dl, MVT::i32));
16417 } else {
16418 // CMOV 0, 1, ==, (CMPZ x, y) ->
16419 // (ADDCARRY (SUB x, y), t:0, t:1)
16420 // where t = (SUBCARRY 0, (SUB x, y), 0)
16421 //
16422 // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
16423 // x != y. In other words, a carry C == 1 when x == y, C == 0
16424 // otherwise.
16425 // The final ADDCARRY computes
16426 // x - y + (0 - (x - y)) + C == C
16427 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
16428 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
16429 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
16430 // ISD::SUBCARRY returns a borrow but we want the carry here
16431 // actually.
16432 SDValue Carry =
16433 DAG.getNode(ISD::SUB, dl, MVT::i32,
16434 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
16435 Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
16436 }
16437 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
16438 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
16439 // This seems pointless but will allow us to combine it further below.
16440 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
16441 SDValue Sub =
16442 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
16443 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
16444 Sub.getValue(1), SDValue());
16445 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
16446 N->getOperand(3), CPSRGlue.getValue(1));
16447 FalseVal = Sub;
16448 }
16449 } else if (isNullConstant(TrueVal)) {
16450 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
16451 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
16452 // This seems pointless but will allow us to combine it further below
16453 // Note that we change == for != as this is the dual for the case above.
16454 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
16455 SDValue Sub =
16456 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
16457 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
16458 Sub.getValue(1), SDValue());
16459 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
16460 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
16461 N->getOperand(3), CPSRGlue.getValue(1));
16462 FalseVal = Sub;
16463 }
16464 }
16465
16466 // On Thumb1, the DAG above may be further combined if z is a power of 2
16467 // (z == 2 ^ K).
16468 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
16469 // t1 = (USUBO (SUB x, y), 1)
16470 // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
16471 // Result = if K != 0 then (SHL t2:0, K) else t2:0
16472 //
16473 // This also handles the special case of comparing against zero; it's
16474 // essentially, the same pattern, except there's no SUBS:
16475 // CMOV x, z, !=, (CMPZ x, 0) ->
16476 // t1 = (USUBO x, 1)
16477 // t2 = (SUBCARRY x, t1:0, t1:1)
16478 // Result = if K != 0 then (SHL t2:0, K) else t2:0
16479 const APInt *TrueConst;
16480 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
16481 ((FalseVal.getOpcode() == ARMISD::SUBS &&
16482 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
16483 (FalseVal == LHS && isNullConstant(RHS))) &&
16484 (TrueConst = isPowerOf2Constant(TrueVal))) {
16485 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
16486 unsigned ShiftAmount = TrueConst->logBase2();
16487 if (ShiftAmount)
16488 TrueVal = DAG.getConstant(1, dl, VT);
16489 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
16490 Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
16491
16492 if (ShiftAmount)
16493 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
16494 DAG.getConstant(ShiftAmount, dl, MVT::i32));
16495 }
16496
16497 if (Res.getNode()) {
16498 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
16499 // Capture demanded bits information that would be otherwise lost.
16500 if (Known.Zero == 0xfffffffe)
16501 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
16502 DAG.getValueType(MVT::i1));
16503 else if (Known.Zero == 0xffffff00)
16504 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
16505 DAG.getValueType(MVT::i8));
16506 else if (Known.Zero == 0xffff0000)
16507 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
16508 DAG.getValueType(MVT::i16));
16509 }
16510
16511 return Res;
16512}
16513
16514static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
16515 const ARMSubtarget *ST) {
16516 SDValue Src = N->getOperand(0);
16517 EVT DstVT = N->getValueType(0);
16518
16519 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
16520 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
16521 EVT SrcVT = Src.getValueType();
16522 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
16523 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
16524 }
16525
16526 // We may have a bitcast of something that has already had this bitcast
16527 // combine performed on it, so skip past any VECTOR_REG_CASTs.
16528 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
16529 Src = Src.getOperand(0);
16530
16531 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
16532 // would be generated is at least the width of the element type.
16533 EVT SrcVT = Src.getValueType();
16534 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
16535 Src.getOpcode() == ARMISD::VMVNIMM ||
16536 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
16537 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
16538 DAG.getDataLayout().isBigEndian())
16539 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
16540
16541 return SDValue();
16542}
16543
16544SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
16545 DAGCombinerInfo &DCI) const {
16546 switch (N->getOpcode()) {
16547 default: break;
16548 case ISD::SELECT_CC:
16549 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
16550 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
16551 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
16552 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
16553 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
16554 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
16555 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
16556 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
16557 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
16558 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
16559 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
16560 case ISD::BRCOND:
16561 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
16562 case ARMISD::ADDC:
16563 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
16564 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
16565 case ARMISD::BFI: return PerformBFICombine(N, DCI);
16566 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
16567 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
16568 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
16569 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI);
16570 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
16571 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
16572 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
16573 case ISD::EXTRACT_VECTOR_ELT:
16574 return PerformExtractEltCombine(N, DCI, Subtarget);
16575 case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
16576 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
16577 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
16578 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
16579 case ISD::FP_TO_SINT:
16580 case ISD::FP_TO_UINT:
16581 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
16582 case ISD::FDIV:
16583 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
16584 case ISD::INTRINSIC_WO_CHAIN:
16585 return PerformIntrinsicCombine(N, DCI);
16586 case ISD::SHL:
16587 case ISD::SRA:
16588 case ISD::SRL:
16589 return PerformShiftCombine(N, DCI, Subtarget);
16590 case ISD::SIGN_EXTEND:
16591 case ISD::ZERO_EXTEND:
16592 case ISD::ANY_EXTEND:
16593 return PerformExtendCombine(N, DCI.DAG, Subtarget);
16594 case ISD::FP_EXTEND:
16595 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
16596 case ISD::SMIN:
16597 case ISD::UMIN:
16598 case ISD::SMAX:
16599 case ISD::UMAX:
16600 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
16601 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
16602 case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
16603 case ISD::LOAD: return PerformLOADCombine(N, DCI);
16604 case ARMISD::VLD1DUP:
16605 case ARMISD::VLD2DUP:
16606 case ARMISD::VLD3DUP:
16607 case ARMISD::VLD4DUP:
16608 return PerformVLDCombine(N, DCI);
16609 case ARMISD::BUILD_VECTOR:
16610 return PerformARMBUILD_VECTORCombine(N, DCI);
16611 case ISD::BITCAST:
16612 return PerformBITCASTCombine(N, DCI.DAG, Subtarget);
16613 case ARMISD::PREDICATE_CAST:
16614 return PerformPREDICATE_CASTCombine(N, DCI);
16615 case ARMISD::VECTOR_REG_CAST:
16616 return PerformVECTOR_REG_CASTCombine(N, DCI, Subtarget);
16617 case ARMISD::VCMP:
16618 return PerformVCMPCombine(N, DCI, Subtarget);
16619 case ISD::VECREDUCE_ADD:
16620 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
16621 case ARMISD::VMOVN:
16622 return PerformVMOVNCombine(N, DCI);
16623 case ARMISD::VQMOVNs:
16624 case ARMISD::VQMOVNu:
16625 return PerformVQMOVNCombine(N, DCI);
16626 case ARMISD::ASRL:
16627 case ARMISD::LSRL:
16628 case ARMISD::LSLL:
16629 return PerformLongShiftCombine(N, DCI.DAG);
16630 case ARMISD::SMULWB: {
16631 unsigned BitWidth = N->getValueType(0).getSizeInBits();
16632 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
16633 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
16634 return SDValue();
16635 break;
16636 }
16637 case ARMISD::SMULWT: {
16638 unsigned BitWidth = N->getValueType(0).getSizeInBits();
16639 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
16640 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
16641 return SDValue();
16642 break;
16643 }
16644 case ARMISD::SMLALBB:
16645 case ARMISD::QADD16b:
16646 case ARMISD::QSUB16b: {
16647 unsigned BitWidth = N->getValueType(0).getSizeInBits();
16648 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
16649 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
16650 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
16651 return SDValue();
16652 break;
16653 }
16654 case ARMISD::SMLALBT: {
16655 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
16656 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
16657 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
16658 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
16659 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
16660 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
16661 return SDValue();
16662 break;
16663 }
16664 case ARMISD::SMLALTB: {
16665 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
16666 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
16667 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
16668 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
16669 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
16670 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
16671 return SDValue();
16672 break;
16673 }
16674 case ARMISD::SMLALTT: {
16675 unsigned BitWidth = N->getValueType(0).getSizeInBits();
16676 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
16677 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
16678 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
16679 return SDValue();
16680 break;
16681 }
16682 case ARMISD::QADD8b:
16683 case ARMISD::QSUB8b: {
16684 unsigned BitWidth = N->getValueType(0).getSizeInBits();
16685 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
16686 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
16687 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
16688 return SDValue();
16689 break;
16690 }
16691 case ISD::INTRINSIC_VOID:
16692 case ISD::INTRINSIC_W_CHAIN:
16693 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16694 case Intrinsic::arm_neon_vld1:
16695 case Intrinsic::arm_neon_vld1x2:
16696 case Intrinsic::arm_neon_vld1x3:
16697 case Intrinsic::arm_neon_vld1x4:
16698 case Intrinsic::arm_neon_vld2:
16699 case Intrinsic::arm_neon_vld3:
16700 case Intrinsic::arm_neon_vld4:
16701 case Intrinsic::arm_neon_vld2lane:
16702 case Intrinsic::arm_neon_vld3lane:
16703 case Intrinsic::arm_neon_vld4lane:
16704 case Intrinsic::arm_neon_vld2dup:
16705 case Intrinsic::arm_neon_vld3dup:
16706 case Intrinsic::arm_neon_vld4dup:
16707 case Intrinsic::arm_neon_vst1:
16708 case Intrinsic::arm_neon_vst1x2:
16709 case Intrinsic::arm_neon_vst1x3:
16710 case Intrinsic::arm_neon_vst1x4:
16711 case Intrinsic::arm_neon_vst2:
16712 case Intrinsic::arm_neon_vst3:
16713 case Intrinsic::arm_neon_vst4:
16714 case Intrinsic::arm_neon_vst2lane:
16715 case Intrinsic::arm_neon_vst3lane:
16716 case Intrinsic::arm_neon_vst4lane:
16717 return PerformVLDCombine(N, DCI);
16718 case Intrinsic::arm_mve_vld2q:
16719 case Intrinsic::arm_mve_vld4q:
16720 case Intrinsic::arm_mve_vst2q:
16721 case Intrinsic::arm_mve_vst4q:
16722 return PerformMVEVLDCombine(N, DCI);
16723 default: break;
16724 }
16725 break;
16726 }
16727 return SDValue();
16728}
16729
16730bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
16731 EVT VT) const {
16732 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
16733}
16734
16735bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
16736 Align Alignment,
16737 MachineMemOperand::Flags,
16738 bool *Fast) const {
16739 // Depends what it gets converted into if the type is weird.
16740 if (!VT.isSimple())
16741 return false;
16742
16743 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
16744 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
16745 auto Ty = VT.getSimpleVT().SimpleTy;
16746
16747 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
16748 // Unaligned access can use (for example) LRDB, LRDH, LDR
16749 if (AllowsUnaligned) {
16750 if (Fast)
16751 *Fast = Subtarget->hasV7Ops();
16752 return true;
16753 }
16754 }
16755
16756 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
16757 // For any little-endian targets with neon, we can support unaligned ld/st
16758 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
16759 // A big-endian target may also explicitly support unaligned accesses
16760 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
16761 if (Fast)
16762 *Fast = true;
16763 return true;
16764 }
16765 }
16766
16767 if (!Subtarget->hasMVEIntegerOps())
16768 return false;
16769
16770 // These are for predicates
16771 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
16772 if (Fast)
16773 *Fast = true;
16774 return true;
16775 }
16776
16777 // These are for truncated stores/narrowing loads. They are fine so long as
16778 // the alignment is at least the size of the item being loaded
16779 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
16780 Alignment >= VT.getScalarSizeInBits() / 8) {
16781 if (Fast)
16782 *Fast = true;
16783 return true;
16784 }
16785
16786 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
16787 // VSTRW.U32 all store the vector register in exactly the same format, and
16788 // differ only in the range of their immediate offset field and the required
16789 // alignment. So there is always a store that can be used, regardless of
16790 // actual type.
16791 //
16792 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
16793 // VREV64.8) pair and get the same effect. This will likely be better than
16794 // aligning the vector through the stack.
16795 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
16796 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
16797 Ty == MVT::v2f64) {
16798 if (Fast)
16799 *Fast = true;
16800 return true;
16801 }
16802
16803 return false;
16804}
16805
16806
16807EVT ARMTargetLowering::getOptimalMemOpType(
16808 const MemOp &Op, const AttributeList &FuncAttributes) const {
16809 // See if we can use NEON instructions for this...
16810 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
16811 !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
16812 bool Fast;
16813 if (Op.size() >= 16 &&
16814 (Op.isAligned(Align(16)) ||
16815 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
16816 MachineMemOperand::MONone, &Fast) &&
16817 Fast))) {
16818 return MVT::v2f64;
16819 } else if (Op.size() >= 8 &&
16820 (Op.isAligned(Align(8)) ||
16821 (allowsMisalignedMemoryAccesses(
16822 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
16823 Fast))) {
16824 return MVT::f64;
16825 }
16826 }
16827
16828 // Let the target-independent logic figure it out.
16829 return MVT::Other;
16830}
16831
16832// 64-bit integers are split into their high and low parts and held in two
16833// different registers, so the trunc is free since the low register can just
16834// be used.
16835bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
16836 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
16837 return false;
16838 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
16839 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
16840 return (SrcBits == 64 && DestBits == 32);
16841}
16842
16843bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
16844 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
16845 !DstVT.isInteger())
16846 return false;
16847 unsigned SrcBits = SrcVT.getSizeInBits();
16848 unsigned DestBits = DstVT.getSizeInBits();
16849 return (SrcBits == 64 && DestBits == 32);
16850}
16851
16852bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16853 if (Val.getOpcode() != ISD::LOAD)
16854 return false;
16855
16856 EVT VT1 = Val.getValueType();
16857 if (!VT1.isSimple() || !VT1.isInteger() ||
16858 !VT2.isSimple() || !VT2.isInteger())
16859 return false;
16860
16861 switch (VT1.getSimpleVT().SimpleTy) {
16862 default: break;
16863 case MVT::i1:
16864 case MVT::i8:
16865 case MVT::i16:
16866 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
16867 return true;
16868 }
16869
16870 return false;
16871}
16872
16873bool ARMTargetLowering::isFNegFree(EVT VT) const {
16874 if (!VT.isSimple())
16875 return false;
16876
16877 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
16878 // negate values directly (fneg is free). So, we don't want to let the DAG
16879 // combiner rewrite fneg into xors and some other instructions. For f16 and
16880 // FullFP16 argument passing, some bitcast nodes may be introduced,
16881 // triggering this DAG combine rewrite, so we are avoiding that with this.
16882 switch (VT.getSimpleVT().SimpleTy) {
16883 default: break;
16884 case MVT::f16:
16885 return Subtarget->hasFullFP16();
16886 }
16887
16888 return false;
16889}
16890
16891/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
16892/// of the vector elements.
16893static bool areExtractExts(Value *Ext1, Value *Ext2) {
16894 auto areExtDoubled = [](Instruction *Ext) {
16895 return Ext->getType()->getScalarSizeInBits() ==
16896 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
16897 };
16898
16899 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
16900 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
16901 !areExtDoubled(cast<Instruction>(Ext1)) ||
16902 !areExtDoubled(cast<Instruction>(Ext2)))
16903 return false;
16904
16905 return true;
16906}
16907
16908/// Check if sinking \p I's operands to I's basic block is profitable, because
16909/// the operands can be folded into a target instruction, e.g.
16910/// sext/zext can be folded into vsubl.
16911bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
16912 SmallVectorImpl<Use *> &Ops) const {
16913 if (!I->getType()->isVectorTy())
16914 return false;
16915
16916 if (Subtarget->hasNEON()) {
16917 switch (I->getOpcode()) {
16918 case Instruction::Sub:
16919 case Instruction::Add: {
16920 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
16921 return false;
16922 Ops.push_back(&I->getOperandUse(0));
16923 Ops.push_back(&I->getOperandUse(1));
16924 return true;
16925 }
16926 default:
16927 return false;
16928 }
16929 }
16930
16931 if (!Subtarget->hasMVEIntegerOps())
16932 return false;
16933
16934 auto IsFMSMul = [&](Instruction *I) {
16935 if (!I->hasOneUse())
16936 return false;
16937 auto *Sub = cast<Instruction>(*I->users().begin());
16938 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
16939 };
16940 auto IsFMS = [&](Instruction *I) {
16941 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
16942 match(I->getOperand(1), m_FNeg(m_Value())))
16943 return true;
16944 return false;
16945 };
16946
16947 auto IsSinker = [&](Instruction *I, int Operand) {
16948 switch (I->getOpcode()) {
16949 case Instruction::Add:
16950 case Instruction::Mul:
16951 case Instruction::FAdd:
16952 case Instruction::ICmp:
16953 case Instruction::FCmp:
16954 return true;
16955 case Instruction::FMul:
16956 return !IsFMSMul(I);
16957 case Instruction::Sub:
16958 case Instruction::FSub:
16959 case Instruction::Shl:
16960 case Instruction::LShr:
16961 case Instruction::AShr:
16962 return Operand == 1;
16963 case Instruction::Call:
16964 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
16965 switch (II->getIntrinsicID()) {
16966 case Intrinsic::fma:
16967 return !IsFMS(I);
16968 case Intrinsic::arm_mve_add_predicated:
16969 case Intrinsic::arm_mve_mul_predicated:
16970 case Intrinsic::arm_mve_qadd_predicated:
16971 case Intrinsic::arm_mve_hadd_predicated:
16972 case Intrinsic::arm_mve_vqdmull_predicated:
16973 case Intrinsic::arm_mve_qdmulh_predicated:
16974 case Intrinsic::arm_mve_qrdmulh_predicated:
16975 case Intrinsic::arm_mve_fma_predicated:
16976 return true;
16977 case Intrinsic::arm_mve_sub_predicated:
16978 case Intrinsic::arm_mve_qsub_predicated:
16979 case Intrinsic::arm_mve_hsub_predicated:
16980 return Operand == 1;
16981 default:
16982 return false;
16983 }
16984 }
16985 return false;
16986 default:
16987 return false;
16988 }
16989 };
16990
16991 for (auto OpIdx : enumerate(I->operands())) {
16992 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
16993 // Make sure we are not already sinking this operand
16994 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
16995 continue;
16996
16997 Instruction *Shuffle = Op;
16998 if (Shuffle->getOpcode() == Instruction::BitCast)
16999 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
17000 // We are looking for a splat that can be sunk.
17001 if (!Shuffle ||
17002 !match(Shuffle, m_Shuffle(
17003 m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
17004 m_Undef(), m_ZeroMask())))
17005 continue;
17006 if (!IsSinker(I, OpIdx.index()))
17007 continue;
17008
17009 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
17010 // and vector registers
17011 for (Use &U : Op->uses()) {
17012 Instruction *Insn = cast<Instruction>(U.getUser());
17013 if (!IsSinker(Insn, U.getOperandNo()))
17014 return false;
17015 }
17016
17017 Ops.push_back(&Shuffle->getOperandUse(0));
17018 if (Shuffle != Op)
17019 Ops.push_back(&Op->getOperandUse(0));
17020 Ops.push_back(&OpIdx.value());
17021 }
17022 return true;
17023}
17024
17025Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {
17026 if (!Subtarget->hasMVEIntegerOps())
17027 return nullptr;
17028 Type *SVIType = SVI->getType();
17029 Type *ScalarType = SVIType->getScalarType();
17030
17031 if (ScalarType->isFloatTy())
17032 return Type::getInt32Ty(SVIType->getContext());
17033 if (ScalarType->isHalfTy())
17034 return Type::getInt16Ty(SVIType->getContext());
17035 return nullptr;
17036}
17037
17038bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
17039 EVT VT = ExtVal.getValueType();
17040
17041 if (!isTypeLegal(VT))
17042 return false;
17043
17044 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
17045 if (Ld->isExpandingLoad())
17046 return false;
17047 }
17048
17049 if (Subtarget->hasMVEIntegerOps())
17050 return true;
17051
17052 // Don't create a loadext if we can fold the extension into a wide/long
17053 // instruction.
17054 // If there's more than one user instruction, the loadext is desirable no
17055 // matter what. There can be two uses by the same instruction.
17056 if (ExtVal->use_empty() ||
17057 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
17058 return true;
17059
17060 SDNode *U = *ExtVal->use_begin();
17061 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
17062 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
17063 return false;
17064
17065 return true;
17066}
17067
17068bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
17069 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17070 return false;
17071
17072 if (!isTypeLegal(EVT::getEVT(Ty1)))
17073 return false;
17074
17075 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop")((Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"
) ? static_cast<void> (0) : __assert_fail ("Ty1->getPrimitiveSizeInBits() <= 64 && \"i128 is probably not a noop\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17075, __PRETTY_FUNCTION__))
;
17076
17077 // Assuming the caller doesn't have a zeroext or signext return parameter,
17078 // truncation all the way down to i1 is valid.
17079 return true;
17080}
17081
17082int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
17083 const AddrMode &AM, Type *Ty,
17084 unsigned AS) const {
17085 if (isLegalAddressingMode(DL, AM, Ty, AS)) {
17086 if (Subtarget->hasFPAO())
17087 return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
17088 return 0;
17089 }
17090 return -1;
17091}
17092
17093/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
17094/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
17095/// expanded to FMAs when this method returns true, otherwise fmuladd is
17096/// expanded to fmul + fadd.
17097///
17098/// ARM supports both fused and unfused multiply-add operations; we already
17099/// lower a pair of fmul and fadd to the latter so it's not clear that there
17100/// would be a gain or that the gain would be worthwhile enough to risk
17101/// correctness bugs.
17102///
17103/// For MVE, we set this to true as it helps simplify the need for some
17104/// patterns (and we don't have the non-fused floating point instruction).
17105bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17106 EVT VT) const {
17107 if (!VT.isSimple())
17108 return false;
17109
17110 switch (VT.getSimpleVT().SimpleTy) {
17111 case MVT::v4f32:
17112 case MVT::v8f16:
17113 return Subtarget->hasMVEFloatOps();
17114 case MVT::f16:
17115 return Subtarget->useFPVFMx16();
17116 case MVT::f32:
17117 return Subtarget->useFPVFMx();
17118 case MVT::f64:
17119 return Subtarget->useFPVFMx64();
17120 default:
17121 break;
17122 }
17123
17124 return false;
17125}
17126
17127static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
17128 if (V < 0)
17129 return false;
17130
17131 unsigned Scale = 1;
17132 switch (VT.getSimpleVT().SimpleTy) {
17133 case MVT::i1:
17134 case MVT::i8:
17135 // Scale == 1;
17136 break;
17137 case MVT::i16:
17138 // Scale == 2;
17139 Scale = 2;
17140 break;
17141 default:
17142 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
17143 // Scale == 4;
17144 Scale = 4;
17145 break;
17146 }
17147
17148 if ((V & (Scale - 1)) != 0)
17149 return false;
17150 return isUInt<5>(V / Scale);
17151}
17152
17153static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
17154 const ARMSubtarget *Subtarget) {
17155 if (!VT.isInteger() && !VT.isFloatingPoint())
17156 return false;
17157 if (VT.isVector() && Subtarget->hasNEON())
17158 return false;
17159 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
17160 !Subtarget->hasMVEFloatOps())
17161 return false;
17162
17163 bool IsNeg = false;
17164 if (V < 0) {
17165 IsNeg = true;
17166 V = -V;
17167 }
17168
17169 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
17170
17171 // MVE: size * imm7
17172 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
17173 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
17174 case MVT::i32:
17175 case MVT::f32:
17176 return isShiftedUInt<7,2>(V);
17177 case MVT::i16:
17178 case MVT::f16:
17179 return isShiftedUInt<7,1>(V);
17180 case MVT::i8:
17181 return isUInt<7>(V);
17182 default:
17183 return false;
17184 }
17185 }
17186
17187 // half VLDR: 2 * imm8
17188 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
17189 return isShiftedUInt<8, 1>(V);
17190 // VLDR and LDRD: 4 * imm8
17191 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
17192 return isShiftedUInt<8, 2>(V);
17193
17194 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
17195 // + imm12 or - imm8
17196 if (IsNeg)
17197 return isUInt<8>(V);
17198 return isUInt<12>(V);
17199 }
17200
17201 return false;
17202}
17203
17204/// isLegalAddressImmediate - Return true if the integer value can be used
17205/// as the offset of the target addressing mode for load / store of the
17206/// given type.
17207static bool isLegalAddressImmediate(int64_t V, EVT VT,
17208 const ARMSubtarget *Subtarget) {
17209 if (V == 0)
17210 return true;
17211
17212 if (!VT.isSimple())
17213 return false;
17214
17215 if (Subtarget->isThumb1Only())
17216 return isLegalT1AddressImmediate(V, VT);
17217 else if (Subtarget->isThumb2())
17218 return isLegalT2AddressImmediate(V, VT, Subtarget);
17219
17220 // ARM mode.
17221 if (V < 0)
17222 V = - V;
17223 switch (VT.getSimpleVT().SimpleTy) {
17224 default: return false;
17225 case MVT::i1:
17226 case MVT::i8:
17227 case MVT::i32:
17228 // +- imm12
17229 return isUInt<12>(V);
17230 case MVT::i16:
17231 // +- imm8
17232 return isUInt<8>(V);
17233 case MVT::f32:
17234 case MVT::f64:
17235 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
17236 return false;
17237 return isShiftedUInt<8, 2>(V);
17238 }
17239}
17240
17241bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
17242 EVT VT) const {
17243 int Scale = AM.Scale;
17244 if (Scale < 0)
17245 return false;
17246
17247 switch (VT.getSimpleVT().SimpleTy) {
17248 default: return false;
17249 case MVT::i1:
17250 case MVT::i8:
17251 case MVT::i16:
17252 case MVT::i32:
17253 if (Scale == 1)
17254 return true;
17255 // r + r << imm
17256 Scale = Scale & ~1;
17257 return Scale == 2 || Scale == 4 || Scale == 8;
17258 case MVT::i64:
17259 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
17260 // version in Thumb mode.
17261 // r + r
17262 if (Scale == 1)
17263 return true;
17264 // r * 2 (this can be lowered to r + r).
17265 if (!AM.HasBaseReg && Scale == 2)
17266 return true;
17267 return false;
17268 case MVT::isVoid:
17269 // Note, we allow "void" uses (basically, uses that aren't loads or
17270 // stores), because arm allows folding a scale into many arithmetic
17271 // operations. This should be made more precise and revisited later.
17272
17273 // Allow r << imm, but the imm has to be a multiple of two.
17274 if (Scale & 1) return false;
17275 return isPowerOf2_32(Scale);
17276 }
17277}
17278
17279bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,
17280 EVT VT) const {
17281 const int Scale = AM.Scale;
17282
17283 // Negative scales are not supported in Thumb1.
17284 if (Scale < 0)
17285 return false;
17286
17287 // Thumb1 addressing modes do not support register scaling excepting the
17288 // following cases:
17289 // 1. Scale == 1 means no scaling.
17290 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
17291 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
17292}
17293
17294/// isLegalAddressingMode - Return true if the addressing mode represented
17295/// by AM is legal for this target, for a load/store of the specified type.
17296bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17297 const AddrMode &AM, Type *Ty,
17298 unsigned AS, Instruction *I) const {
17299 EVT VT = getValueType(DL, Ty, true);
17300 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
17301 return false;
17302
17303 // Can never fold addr of global into load/store.
17304 if (AM.BaseGV)
17305 return false;
17306
17307 switch (AM.Scale) {
17308 case 0: // no scale reg, must be "r+i" or "r", or "i".
17309 break;
17310 default:
17311 // ARM doesn't support any R+R*scale+imm addr modes.
17312 if (AM.BaseOffs)
17313 return false;
17314
17315 if (!VT.isSimple())
17316 return false;
17317
17318 if (Subtarget->isThumb1Only())
17319 return isLegalT1ScaledAddressingMode(AM, VT);
17320
17321 if (Subtarget->isThumb2())
17322 return isLegalT2ScaledAddressingMode(AM, VT);
17323
17324 int Scale = AM.Scale;
17325 switch (VT.getSimpleVT().SimpleTy) {
17326 default: return false;
17327 case MVT::i1:
17328 case MVT::i8:
17329 case MVT::i32:
17330 if (Scale < 0) Scale = -Scale;
17331 if (Scale == 1)
17332 return true;
17333 // r + r << imm
17334 return isPowerOf2_32(Scale & ~1);
17335 case MVT::i16:
17336 case MVT::i64:
17337 // r +/- r
17338 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
17339 return true;
17340 // r * 2 (this can be lowered to r + r).
17341 if (!AM.HasBaseReg && Scale == 2)
17342 return true;
17343 return false;
17344
17345 case MVT::isVoid:
17346 // Note, we allow "void" uses (basically, uses that aren't loads or
17347 // stores), because arm allows folding a scale into many arithmetic
17348 // operations. This should be made more precise and revisited later.
17349
17350 // Allow r << imm, but the imm has to be a multiple of two.
17351 if (Scale & 1) return false;
17352 return isPowerOf2_32(Scale);
17353 }
17354 }
17355 return true;
17356}
17357
17358/// isLegalICmpImmediate - Return true if the specified immediate is legal
17359/// icmp immediate, that is the target has icmp instructions which can compare
17360/// a register against the immediate without having to materialize the
17361/// immediate into a register.
17362bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17363 // Thumb2 and ARM modes can use cmn for negative immediates.
17364 if (!Subtarget->isThumb())
17365 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
17366 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
17367 if (Subtarget->isThumb2())
17368 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
17369 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
17370 // Thumb1 doesn't have cmn, and only 8-bit immediates.
17371 return Imm >= 0 && Imm <= 255;
17372}
17373
17374/// isLegalAddImmediate - Return true if the specified immediate is a legal add
17375/// *or sub* immediate, that is the target has add or sub instructions which can
17376/// add a register with the immediate without having to materialize the
17377/// immediate into a register.
17378bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17379 // Same encoding for add/sub, just flip the sign.
17380 int64_t AbsImm = std::abs(Imm);
17381 if (!Subtarget->isThumb())
17382 return ARM_AM::getSOImmVal(AbsImm) != -1;
17383 if (Subtarget->isThumb2())
17384 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
17385 // Thumb1 only has 8-bit unsigned immediate.
17386 return AbsImm >= 0 && AbsImm <= 255;
17387}
17388
17389static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
17390 bool isSEXTLoad, SDValue &Base,
17391 SDValue &Offset, bool &isInc,
17392 SelectionDAG &DAG) {
17393 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
17394 return false;
17395
17396 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
17397 // AddressingMode 3
17398 Base = Ptr->getOperand(0);
17399 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
17400 int RHSC = (int)RHS->getZExtValue();
17401 if (RHSC < 0 && RHSC > -256) {
17402 assert(Ptr->getOpcode() == ISD::ADD)((Ptr->getOpcode() == ISD::ADD) ? static_cast<void> (
0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17402, __PRETTY_FUNCTION__))
;
17403 isInc = false;
17404 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
17405 return true;
17406 }
17407 }
17408 isInc = (Ptr->getOpcode() == ISD::ADD);
17409 Offset = Ptr->getOperand(1);
17410 return true;
17411 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
17412 // AddressingMode 2
17413 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
17414 int RHSC = (int)RHS->getZExtValue();
17415 if (RHSC < 0 && RHSC > -0x1000) {
17416 assert(Ptr->getOpcode() == ISD::ADD)((Ptr->getOpcode() == ISD::ADD) ? static_cast<void> (
0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17416, __PRETTY_FUNCTION__))
;
17417 isInc = false;
17418 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
17419 Base = Ptr->getOperand(0);
17420 return true;
17421 }
17422 }
17423
17424 if (Ptr->getOpcode() == ISD::ADD) {
17425 isInc = true;
17426 ARM_AM::ShiftOpc ShOpcVal=
17427 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
17428 if (ShOpcVal != ARM_AM::no_shift) {
17429 Base = Ptr->getOperand(1);
17430 Offset = Ptr->getOperand(0);
17431 } else {
17432 Base = Ptr->getOperand(0);
17433 Offset = Ptr->getOperand(1);
17434 }
17435 return true;
17436 }
17437
17438 isInc = (Ptr->getOpcode() == ISD::ADD);
17439 Base = Ptr->getOperand(0);
17440 Offset = Ptr->getOperand(1);
17441 return true;
17442 }
17443
17444 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
17445 return false;
17446}
17447
17448static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
17449 bool isSEXTLoad, SDValue &Base,
17450 SDValue &Offset, bool &isInc,
17451 SelectionDAG &DAG) {
17452 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
17453 return false;
17454
17455 Base = Ptr->getOperand(0);
17456 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
17457 int RHSC = (int)RHS->getZExtValue();
17458 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
17459 assert(Ptr->getOpcode() == ISD::ADD)((Ptr->getOpcode() == ISD::ADD) ? static_cast<void> (
0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17459, __PRETTY_FUNCTION__))
;
17460 isInc = false;
17461 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
17462 return true;
17463 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
17464 isInc = Ptr->getOpcode() == ISD::ADD;
17465 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
17466 return true;
17467 }
17468 }
17469
17470 return false;
17471}
17472
17473static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
17474 bool isSEXTLoad, bool IsMasked, bool isLE,
17475 SDValue &Base, SDValue &Offset,
17476 bool &isInc, SelectionDAG &DAG) {
17477 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
17478 return false;
17479 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
17480 return false;
17481
17482 // We allow LE non-masked loads to change the type (for example use a vldrb.8
17483 // as opposed to a vldrw.32). This can allow extra addressing modes or
17484 // alignments for what is otherwise an equivalent instruction.
17485 bool CanChangeType = isLE && !IsMasked;
17486
17487 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
17488 int RHSC = (int)RHS->getZExtValue();
17489
17490 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
17491 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
17492 assert(Ptr->getOpcode() == ISD::ADD)((Ptr->getOpcode() == ISD::ADD) ? static_cast<void> (
0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17492, __PRETTY_FUNCTION__))
;
17493 isInc = false;
17494 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
17495 return true;
17496 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
17497 isInc = Ptr->getOpcode() == ISD::ADD;
17498 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
17499 return true;
17500 }
17501 return false;
17502 };
17503
17504 // Try to find a matching instruction based on s/zext, Alignment, Offset and
17505 // (in BE/masked) type.
17506 Base = Ptr->getOperand(0);
17507 if (VT == MVT::v4i16) {
17508 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
17509 return true;
17510 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
17511 if (IsInRange(RHSC, 0x80, 1))
17512 return true;
17513 } else if (Alignment >= 4 &&
17514 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
17515 IsInRange(RHSC, 0x80, 4))
17516 return true;
17517 else if (Alignment >= 2 &&
17518 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
17519 IsInRange(RHSC, 0x80, 2))
17520 return true;
17521 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
17522 return true;
17523 return false;
17524}
17525
17526/// getPreIndexedAddressParts - returns true by value, base pointer and
17527/// offset pointer and addressing mode by reference if the node's address
17528/// can be legally represented as pre-indexed load / store address.
17529bool
17530ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
17531 SDValue &Offset,
17532 ISD::MemIndexedMode &AM,
17533 SelectionDAG &DAG) const {
17534 if (Subtarget->isThumb1Only())
17535 return false;
17536
17537 EVT VT;
17538 SDValue Ptr;
17539 Align Alignment;
17540 bool isSEXTLoad = false;
17541 bool IsMasked = false;
17542 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17543 Ptr = LD->getBasePtr();
17544 VT = LD->getMemoryVT();
17545 Alignment = LD->getAlign();
17546 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
17547 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17548 Ptr = ST->getBasePtr();
17549 VT = ST->getMemoryVT();
17550 Alignment = ST->getAlign();
17551 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
17552 Ptr = LD->getBasePtr();
17553 VT = LD->getMemoryVT();
17554 Alignment = LD->getAlign();
17555 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
17556 IsMasked = true;
17557 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
17558 Ptr = ST->getBasePtr();
17559 VT = ST->getMemoryVT();
17560 Alignment = ST->getAlign();
17561 IsMasked = true;
17562 } else
17563 return false;
17564
17565 bool isInc;
17566 bool isLegal = false;
17567 if (VT.isVector())
17568 isLegal = Subtarget->hasMVEIntegerOps() &&
17569 getMVEIndexedAddressParts(
17570 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
17571 Subtarget->isLittle(), Base, Offset, isInc, DAG);
17572 else {
17573 if (Subtarget->isThumb2())
17574 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
17575 Offset, isInc, DAG);
17576 else
17577 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
17578 Offset, isInc, DAG);
17579 }
17580 if (!isLegal)
17581 return false;
17582
17583 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
17584 return true;
17585}
17586
17587/// getPostIndexedAddressParts - returns true by value, base pointer and
17588/// offset pointer and addressing mode by reference if this node can be
17589/// combined with a load / store to form a post-indexed load / store.
17590bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
17591 SDValue &Base,
17592 SDValue &Offset,
17593 ISD::MemIndexedMode &AM,
17594 SelectionDAG &DAG) const {
17595 EVT VT;
17596 SDValue Ptr;
17597 Align Alignment;
17598 bool isSEXTLoad = false, isNonExt;
17599 bool IsMasked = false;
17600 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17601 VT = LD->getMemoryVT();
17602 Ptr = LD->getBasePtr();
17603 Alignment = LD->getAlign();
17604 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
17605 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
17606 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17607 VT = ST->getMemoryVT();
17608 Ptr = ST->getBasePtr();
17609 Alignment = ST->getAlign();
17610 isNonExt = !ST->isTruncatingStore();
17611 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
17612 VT = LD->getMemoryVT();
17613 Ptr = LD->getBasePtr();
17614 Alignment = LD->getAlign();
17615 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
17616 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
17617 IsMasked = true;
17618 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
17619 VT = ST->getMemoryVT();
17620 Ptr = ST->getBasePtr();
17621 Alignment = ST->getAlign();
17622 isNonExt = !ST->isTruncatingStore();
17623 IsMasked = true;
17624 } else
17625 return false;
17626
17627 if (Subtarget->isThumb1Only()) {
17628 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
17629 // must be non-extending/truncating, i32, with an offset of 4.
17630 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!")((Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"
) ? static_cast<void> (0) : __assert_fail ("Op->getValueType(0) == MVT::i32 && \"Non-i32 post-inc op?!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17630, __PRETTY_FUNCTION__))
;
17631 if (Op->getOpcode() != ISD::ADD || !isNonExt)
17632 return false;
17633 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
17634 if (!RHS || RHS->getZExtValue() != 4)
17635 return false;
17636
17637 Offset = Op->getOperand(1);
17638 Base = Op->getOperand(0);
17639 AM = ISD::POST_INC;
17640 return true;
17641 }
17642
17643 bool isInc;
17644 bool isLegal = false;
17645 if (VT.isVector())
17646 isLegal = Subtarget->hasMVEIntegerOps() &&
17647 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
17648 Subtarget->isLittle(), Base, Offset,
17649 isInc, DAG);
17650 else {
17651 if (Subtarget->isThumb2())
17652 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
17653 isInc, DAG);
17654 else
17655 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
17656 isInc, DAG);
17657 }
17658 if (!isLegal)
17659 return false;
17660
17661 if (Ptr != Base) {
17662 // Swap base ptr and offset to catch more post-index load / store when
17663 // it's legal. In Thumb2 mode, offset must be an immediate.
17664 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
17665 !Subtarget->isThumb2())
17666 std::swap(Base, Offset);
17667
17668 // Post-indexed load / store update the base pointer.
17669 if (Ptr != Base)
17670 return false;
17671 }
17672
17673 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
17674 return true;
17675}
17676
17677void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
17678 KnownBits &Known,
17679 const APInt &DemandedElts,
17680 const SelectionDAG &DAG,
17681 unsigned Depth) const {
17682 unsigned BitWidth = Known.getBitWidth();
17683 Known.resetAll();
17684 switch (Op.getOpcode()) {
17685 default: break;
17686 case ARMISD::ADDC:
17687 case ARMISD::ADDE:
17688 case ARMISD::SUBC:
17689 case ARMISD::SUBE:
17690 // Special cases when we convert a carry to a boolean.
17691 if (Op.getResNo() == 0) {
17692 SDValue LHS = Op.getOperand(0);
17693 SDValue RHS = Op.getOperand(1);
17694 // (ADDE 0, 0, C) will give us a single bit.
17695 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
17696 isNullConstant(RHS)) {
17697 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
17698 return;
17699 }
17700 }
17701 break;
17702 case ARMISD::CMOV: {
17703 // Bits are known zero/one if known on the LHS and RHS.
17704 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
17705 if (Known.isUnknown())
17706 return;
17707
17708 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
17709 Known = KnownBits::commonBits(Known, KnownRHS);
17710 return;
17711 }
17712 case ISD::INTRINSIC_W_CHAIN: {
17713 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
17714 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
17715 switch (IntID) {
17716 default: return;
17717 case Intrinsic::arm_ldaex:
17718 case Intrinsic::arm_ldrex: {
17719 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
17720 unsigned MemBits = VT.getScalarSizeInBits();
17721 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
17722 return;
17723 }
17724 }
17725 }
17726 case ARMISD::BFI: {
17727 // Conservatively, we can recurse down the first operand
17728 // and just mask out all affected bits.
17729 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17730
17731 // The operand to BFI is already a mask suitable for removing the bits it
17732 // sets.
17733 ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
17734 const APInt &Mask = CI->getAPIntValue();
17735 Known.Zero &= Mask;
17736 Known.One &= Mask;
17737 return;
17738 }
17739 case ARMISD::VGETLANEs:
17740 case ARMISD::VGETLANEu: {
17741 const SDValue &SrcSV = Op.getOperand(0);
17742 EVT VecVT = SrcSV.getValueType();
17743 assert(VecVT.isVector() && "VGETLANE expected a vector type")((VecVT.isVector() && "VGETLANE expected a vector type"
) ? static_cast<void> (0) : __assert_fail ("VecVT.isVector() && \"VGETLANE expected a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17743, __PRETTY_FUNCTION__))
;
17744 const unsigned NumSrcElts = VecVT.getVectorNumElements();
17745 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
17746 assert(Pos->getAPIntValue().ult(NumSrcElts) &&((Pos->getAPIntValue().ult(NumSrcElts) && "VGETLANE index out of bounds"
) ? static_cast<void> (0) : __assert_fail ("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17747, __PRETTY_FUNCTION__))
17747 "VGETLANE index out of bounds")((Pos->getAPIntValue().ult(NumSrcElts) && "VGETLANE index out of bounds"
) ? static_cast<void> (0) : __assert_fail ("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17747, __PRETTY_FUNCTION__))
;
17748 unsigned Idx = Pos->getZExtValue();
17749 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
17750 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
17751
17752 EVT VT = Op.getValueType();
17753 const unsigned DstSz = VT.getScalarSizeInBits();
17754 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
17755 (void)SrcSz;
17756 assert(SrcSz == Known.getBitWidth())((SrcSz == Known.getBitWidth()) ? static_cast<void> (0)
: __assert_fail ("SrcSz == Known.getBitWidth()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17756, __PRETTY_FUNCTION__))
;
17757 assert(DstSz > SrcSz)((DstSz > SrcSz) ? static_cast<void> (0) : __assert_fail
("DstSz > SrcSz", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17757, __PRETTY_FUNCTION__))
;
17758 if (Op.getOpcode() == ARMISD::VGETLANEs)
17759 Known = Known.sext(DstSz);
17760 else {
17761 Known = Known.zext(DstSz);
17762 }
17763 assert(DstSz == Known.getBitWidth())((DstSz == Known.getBitWidth()) ? static_cast<void> (0)
: __assert_fail ("DstSz == Known.getBitWidth()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17763, __PRETTY_FUNCTION__))
;
17764 break;
17765 }
17766 case ARMISD::VMOVrh: {
17767 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
17768 assert(KnownOp.getBitWidth() == 16)((KnownOp.getBitWidth() == 16) ? static_cast<void> (0) :
__assert_fail ("KnownOp.getBitWidth() == 16", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17768, __PRETTY_FUNCTION__))
;
17769 Known = KnownOp.zext(32);
17770 break;
17771 }
17772 case ARMISD::CSINC:
17773 case ARMISD::CSINV:
17774 case ARMISD::CSNEG: {
17775 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
17776 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
17777
17778 // The result is either:
17779 // CSINC: KnownOp0 or KnownOp1 + 1
17780 // CSINV: KnownOp0 or ~KnownOp1
17781 // CSNEG: KnownOp0 or KnownOp1 * -1
17782 if (Op.getOpcode() == ARMISD::CSINC)
17783 KnownOp1 = KnownBits::computeForAddSub(
17784 true, false, KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
17785 else if (Op.getOpcode() == ARMISD::CSINV)
17786 std::swap(KnownOp1.Zero, KnownOp1.One);
17787 else if (Op.getOpcode() == ARMISD::CSNEG)
17788 KnownOp1 = KnownBits::mul(
17789 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
17790
17791 Known = KnownBits::commonBits(KnownOp0, KnownOp1);
17792 break;
17793 }
17794 }
17795}
17796
17797bool ARMTargetLowering::targetShrinkDemandedConstant(
17798 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17799 TargetLoweringOpt &TLO) const {
17800 // Delay optimization, so we don't have to deal with illegal types, or block
17801 // optimizations.
17802 if (!TLO.LegalOps)
17803 return false;
17804
17805 // Only optimize AND for now.
17806 if (Op.getOpcode() != ISD::AND)
17807 return false;
17808
17809 EVT VT = Op.getValueType();
17810
17811 // Ignore vectors.
17812 if (VT.isVector())
17813 return false;
17814
17815 assert(VT == MVT::i32 && "Unexpected integer type")((VT == MVT::i32 && "Unexpected integer type") ? static_cast
<void> (0) : __assert_fail ("VT == MVT::i32 && \"Unexpected integer type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17815, __PRETTY_FUNCTION__))
;
17816
17817 // Make sure the RHS really is a constant.
17818 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17819 if (!C)
17820 return false;
17821
17822 unsigned Mask = C->getZExtValue();
17823
17824 unsigned Demanded = DemandedBits.getZExtValue();
17825 unsigned ShrunkMask = Mask & Demanded;
17826 unsigned ExpandedMask = Mask | ~Demanded;
17827
17828 // If the mask is all zeros, let the target-independent code replace the
17829 // result with zero.
17830 if (ShrunkMask == 0)
17831 return false;
17832
17833 // If the mask is all ones, erase the AND. (Currently, the target-independent
17834 // code won't do this, so we have to do it explicitly to avoid an infinite
17835 // loop in obscure cases.)
17836 if (ExpandedMask == ~0U)
17837 return TLO.CombineTo(Op, Op.getOperand(0));
17838
17839 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
17840 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
17841 };
17842 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
17843 if (NewMask == Mask)
17844 return true;
17845 SDLoc DL(Op);
17846 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
17847 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
17848 return TLO.CombineTo(Op, NewOp);
17849 };
17850
17851 // Prefer uxtb mask.
17852 if (IsLegalMask(0xFF))
17853 return UseMask(0xFF);
17854
17855 // Prefer uxth mask.
17856 if (IsLegalMask(0xFFFF))
17857 return UseMask(0xFFFF);
17858
17859 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
17860 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
17861 if (ShrunkMask < 256)
17862 return UseMask(ShrunkMask);
17863
17864 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
17865 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
17866 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
17867 return UseMask(ExpandedMask);
17868
17869 // Potential improvements:
17870 //
17871 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
17872 // We could try to prefer Thumb1 immediates which can be lowered to a
17873 // two-instruction sequence.
17874 // We could try to recognize more legal ARM/Thumb2 immediates here.
17875
17876 return false;
17877}
17878
17879bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
17880 SDValue Op, const APInt &OriginalDemandedBits,
17881 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
17882 unsigned Depth) const {
17883 unsigned Opc = Op.getOpcode();
17884
17885 switch (Opc) {
17886 case ARMISD::ASRL:
17887 case ARMISD::LSRL: {
17888 // If this is result 0 and the other result is unused, see if the demand
17889 // bits allow us to shrink this long shift into a standard small shift in
17890 // the opposite direction.
17891 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
17892 isa<ConstantSDNode>(Op->getOperand(2))) {
17893 unsigned ShAmt = Op->getConstantOperandVal(2);
17894 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(
17895 APInt::getAllOnesValue(32) << (32 - ShAmt)))
17896 return TLO.CombineTo(
17897 Op, TLO.DAG.getNode(
17898 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
17899 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
17900 }
17901 break;
17902 }
17903 }
17904
17905 return TargetLowering::SimplifyDemandedBitsForTargetNode(
17906 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
17907}
17908
17909//===----------------------------------------------------------------------===//
17910// ARM Inline Assembly Support
17911//===----------------------------------------------------------------------===//
17912
17913bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
17914 // Looking for "rev" which is V6+.
17915 if (!Subtarget->hasV6Ops())
17916 return false;
17917
17918 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
17919 std::string AsmStr = IA->getAsmString();
17920 SmallVector<StringRef, 4> AsmPieces;
17921 SplitString(AsmStr, AsmPieces, ";\n");
17922
17923 switch (AsmPieces.size()) {
17924 default: return false;
17925 case 1:
17926 AsmStr = std::string(AsmPieces[0]);
17927 AsmPieces.clear();
17928 SplitString(AsmStr, AsmPieces, " \t,");
17929
17930 // rev $0, $1
17931 if (AsmPieces.size() == 3 &&
17932 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
17933 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
17934 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
17935 if (Ty && Ty->getBitWidth() == 32)
17936 return IntrinsicLowering::LowerToByteSwap(CI);
17937 }
17938 break;
17939 }
17940
17941 return false;
17942}
17943
17944const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
17945 // At this point, we have to lower this constraint to something else, so we
17946 // lower it to an "r" or "w". However, by doing this we will force the result
17947 // to be in register, while the X constraint is much more permissive.
17948 //
17949 // Although we are correct (we are free to emit anything, without
17950 // constraints), we might break use cases that would expect us to be more
17951 // efficient and emit something else.
17952 if (!Subtarget->hasVFP2Base())
17953 return "r";
17954 if (ConstraintVT.isFloatingPoint())
17955 return "w";
17956 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
17957 (ConstraintVT.getSizeInBits() == 64 ||
17958 ConstraintVT.getSizeInBits() == 128))
17959 return "w";
17960
17961 return "r";
17962}
17963
17964/// getConstraintType - Given a constraint letter, return the type of
17965/// constraint it is for this target.
17966ARMTargetLowering::ConstraintType
17967ARMTargetLowering::getConstraintType(StringRef Constraint) const {
17968 unsigned S = Constraint.size();
17969 if (S == 1) {
17970 switch (Constraint[0]) {
17971 default: break;
17972 case 'l': return C_RegisterClass;
17973 case 'w': return C_RegisterClass;
17974 case 'h': return C_RegisterClass;
17975 case 'x': return C_RegisterClass;
17976 case 't': return C_RegisterClass;
17977 case 'j': return C_Immediate; // Constant for movw.
17978 // An address with a single base register. Due to the way we
17979 // currently handle addresses it is the same as an 'r' memory constraint.
17980 case 'Q': return C_Memory;
17981 }
17982 } else if (S == 2) {
17983 switch (Constraint[0]) {
17984 default: break;
17985 case 'T': return C_RegisterClass;
17986 // All 'U+' constraints are addresses.
17987 case 'U': return C_Memory;
17988 }
17989 }
17990 return TargetLowering::getConstraintType(Constraint);
17991}
17992
17993/// Examine constraint type and operand type and determine a weight value.
17994/// This object must already have been set up with the operand type
17995/// and the current alternative constraint selected.
17996TargetLowering::ConstraintWeight
17997ARMTargetLowering::getSingleConstraintMatchWeight(
17998 AsmOperandInfo &info, const char *constraint) const {
17999 ConstraintWeight weight = CW_Invalid;
18000 Value *CallOperandVal = info.CallOperandVal;
18001 // If we don't have a value, we can't do a match,
18002 // but allow it at the lowest weight.
18003 if (!CallOperandVal)
18004 return CW_Default;
18005 Type *type = CallOperandVal->getType();
18006 // Look at the constraint type.
18007 switch (*constraint) {
18008 default:
18009 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
18010 break;
18011 case 'l':
18012 if (type->isIntegerTy()) {
18013 if (Subtarget->isThumb())
18014 weight = CW_SpecificReg;
18015 else
18016 weight = CW_Register;
18017 }
18018 break;
18019 case 'w':
18020 if (type->isFloatingPointTy())
18021 weight = CW_Register;
18022 break;
18023 }
18024 return weight;
18025}
18026
18027using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
18028
18029RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
18030 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
18031 switch (Constraint.size()) {
18032 case 1:
18033 // GCC ARM Constraint Letters
18034 switch (Constraint[0]) {
18035 case 'l': // Low regs or general regs.
18036 if (Subtarget->isThumb())
18037 return RCPair(0U, &ARM::tGPRRegClass);
18038 return RCPair(0U, &ARM::GPRRegClass);
18039 case 'h': // High regs or no regs.
18040 if (Subtarget->isThumb())
18041 return RCPair(0U, &ARM::hGPRRegClass);
18042 break;
18043 case 'r':
18044 if (Subtarget->isThumb1Only())
18045 return RCPair(0U, &ARM::tGPRRegClass);
18046 return RCPair(0U, &ARM::GPRRegClass);
18047 case 'w':
18048 if (VT == MVT::Other)
18049 break;
18050 if (VT == MVT::f32)
18051 return RCPair(0U, &ARM::SPRRegClass);
18052 if (VT.getSizeInBits() == 64)
18053 return RCPair(0U, &ARM::DPRRegClass);
18054 if (VT.getSizeInBits() == 128)
18055 return RCPair(0U, &ARM::QPRRegClass);
18056 break;
18057 case 'x':
18058 if (VT == MVT::Other)
18059 break;
18060 if (VT == MVT::f32)
18061 return RCPair(0U, &ARM::SPR_8RegClass);
18062 if (VT.getSizeInBits() == 64)
18063 return RCPair(0U, &ARM::DPR_8RegClass);
18064 if (VT.getSizeInBits() == 128)
18065 return RCPair(0U, &ARM::QPR_8RegClass);
18066 break;
18067 case 't':
18068 if (VT == MVT::Other)
18069 break;
18070 if (VT == MVT::f32 || VT == MVT::i32)
18071 return RCPair(0U, &ARM::SPRRegClass);
18072 if (VT.getSizeInBits() == 64)
18073 return RCPair(0U, &ARM::DPR_VFP2RegClass);
18074 if (VT.getSizeInBits() == 128)
18075 return RCPair(0U, &ARM::QPR_VFP2RegClass);
18076 break;
18077 }
18078 break;
18079
18080 case 2:
18081 if (Constraint[0] == 'T') {
18082 switch (Constraint[1]) {
18083 default:
18084 break;
18085 case 'e':
18086 return RCPair(0U, &ARM::tGPREvenRegClass);
18087 case 'o':
18088 return RCPair(0U, &ARM::tGPROddRegClass);
18089 }
18090 }
18091 break;
18092
18093 default:
18094 break;
18095 }
18096
18097 if (StringRef("{cc}").equals_lower(Constraint))
18098 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
18099
18100 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
18101}
18102
18103/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18104/// vector. If it is invalid, don't add anything to Ops.
18105void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
18106 std::string &Constraint,
18107 std::vector<SDValue>&Ops,
18108 SelectionDAG &DAG) const {
18109 SDValue Result;
18110
18111 // Currently only support length 1 constraints.
18112 if (Constraint.length() != 1) return;
18113
18114 char ConstraintLetter = Constraint[0];
18115 switch (ConstraintLetter) {
18116 default: break;
18117 case 'j':
18118 case 'I': case 'J': case 'K': case 'L':
18119 case 'M': case 'N': case 'O':
18120 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
18121 if (!C)
18122 return;
18123
18124 int64_t CVal64 = C->getSExtValue();
18125 int CVal = (int) CVal64;
18126 // None of these constraints allow values larger than 32 bits. Check
18127 // that the value fits in an int.
18128 if (CVal != CVal64)
18129 return;
18130
18131 switch (ConstraintLetter) {
18132 case 'j':
18133 // Constant suitable for movw, must be between 0 and
18134 // 65535.
18135 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
18136 if (CVal >= 0 && CVal <= 65535)
18137 break;
18138 return;
18139 case 'I':
18140 if (Subtarget->isThumb1Only()) {
18141 // This must be a constant between 0 and 255, for ADD
18142 // immediates.
18143 if (CVal >= 0 && CVal <= 255)
18144 break;
18145 } else if (Subtarget->isThumb2()) {
18146 // A constant that can be used as an immediate value in a
18147 // data-processing instruction.
18148 if (ARM_AM::getT2SOImmVal(CVal) != -1)
18149 break;
18150 } else {
18151 // A constant that can be used as an immediate value in a
18152 // data-processing instruction.
18153 if (ARM_AM::getSOImmVal(CVal) != -1)
18154 break;
18155 }
18156 return;
18157
18158 case 'J':
18159 if (Subtarget->isThumb1Only()) {
18160 // This must be a constant between -255 and -1, for negated ADD
18161 // immediates. This can be used in GCC with an "n" modifier that
18162 // prints the negated value, for use with SUB instructions. It is
18163 // not useful otherwise but is implemented for compatibility.
18164 if (CVal >= -255 && CVal <= -1)
18165 break;
18166 } else {
18167 // This must be a constant between -4095 and 4095. It is not clear
18168 // what this constraint is intended for. Implemented for
18169 // compatibility with GCC.
18170 if (CVal >= -4095 && CVal <= 4095)
18171 break;
18172 }
18173 return;
18174
18175 case 'K':
18176 if (Subtarget->isThumb1Only()) {
18177 // A 32-bit value where only one byte has a nonzero value. Exclude
18178 // zero to match GCC. This constraint is used by GCC internally for
18179 // constants that can be loaded with a move/shift combination.
18180 // It is not useful otherwise but is implemented for compatibility.
18181 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
18182 break;
18183 } else if (Subtarget->isThumb2()) {
18184 // A constant whose bitwise inverse can be used as an immediate
18185 // value in a data-processing instruction. This can be used in GCC
18186 // with a "B" modifier that prints the inverted value, for use with
18187 // BIC and MVN instructions. It is not useful otherwise but is
18188 // implemented for compatibility.
18189 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
18190 break;
18191 } else {
18192 // A constant whose bitwise inverse can be used as an immediate
18193 // value in a data-processing instruction. This can be used in GCC
18194 // with a "B" modifier that prints the inverted value, for use with
18195 // BIC and MVN instructions. It is not useful otherwise but is
18196 // implemented for compatibility.
18197 if (ARM_AM::getSOImmVal(~CVal) != -1)
18198 break;
18199 }
18200 return;
18201
18202 case 'L':
18203 if (Subtarget->isThumb1Only()) {
18204 // This must be a constant between -7 and 7,
18205 // for 3-operand ADD/SUB immediate instructions.
18206 if (CVal >= -7 && CVal < 7)
18207 break;
18208 } else if (Subtarget->isThumb2()) {
18209 // A constant whose negation can be used as an immediate value in a
18210 // data-processing instruction. This can be used in GCC with an "n"
18211 // modifier that prints the negated value, for use with SUB
18212 // instructions. It is not useful otherwise but is implemented for
18213 // compatibility.
18214 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
18215 break;
18216 } else {
18217 // A constant whose negation can be used as an immediate value in a
18218 // data-processing instruction. This can be used in GCC with an "n"
18219 // modifier that prints the negated value, for use with SUB
18220 // instructions. It is not useful otherwise but is implemented for
18221 // compatibility.
18222 if (ARM_AM::getSOImmVal(-CVal) != -1)
18223 break;
18224 }
18225 return;
18226
18227 case 'M':
18228 if (Subtarget->isThumb1Only()) {
18229 // This must be a multiple of 4 between 0 and 1020, for
18230 // ADD sp + immediate.
18231 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
18232 break;
18233 } else {
18234 // A power of two or a constant between 0 and 32. This is used in
18235 // GCC for the shift amount on shifted register operands, but it is
18236 // useful in general for any shift amounts.
18237 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
18238 break;
18239 }
18240 return;
18241
18242 case 'N':
18243 if (Subtarget->isThumb1Only()) {
18244 // This must be a constant between 0 and 31, for shift amounts.
18245 if (CVal >= 0 && CVal <= 31)
18246 break;
18247 }
18248 return;
18249
18250 case 'O':
18251 if (Subtarget->isThumb1Only()) {
18252 // This must be a multiple of 4 between -508 and 508, for
18253 // ADD/SUB sp = sp + immediate.
18254 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
18255 break;
18256 }
18257 return;
18258 }
18259 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
18260 break;
18261 }
18262
18263 if (Result.getNode()) {
18264 Ops.push_back(Result);
18265 return;
18266 }
18267 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
18268}
18269
18270static RTLIB::Libcall getDivRemLibcall(
18271 const SDNode *N, MVT::SimpleValueType SVT) {
18272 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18274, __PRETTY_FUNCTION__))
18273 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18274, __PRETTY_FUNCTION__))
18274 "Unhandled Opcode in getDivRemLibcall")(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18274, __PRETTY_FUNCTION__))
;
18275 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
18276 N->getOpcode() == ISD::SREM;
18277 RTLIB::Libcall LC;
18278 switch (SVT) {
18279 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18279)
;
18280 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
18281 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
18282 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
18283 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
18284 }
18285 return LC;
18286}
18287
18288static TargetLowering::ArgListTy getDivRemArgList(
18289 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
18290 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18292, __PRETTY_FUNCTION__))
18291 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18292, __PRETTY_FUNCTION__))
18292 "Unhandled Opcode in getDivRemArgList")(((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD
::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode
() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18292, __PRETTY_FUNCTION__))
;
18293 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
18294 N->getOpcode() == ISD::SREM;
18295 TargetLowering::ArgListTy Args;
18296 TargetLowering::ArgListEntry Entry;
18297 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18298 EVT ArgVT = N->getOperand(i).getValueType();
18299 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
18300 Entry.Node = N->getOperand(i);
18301 Entry.Ty = ArgTy;
18302 Entry.IsSExt = isSigned;
18303 Entry.IsZExt = !isSigned;
18304 Args.push_back(Entry);
18305 }
18306 if (Subtarget->isTargetWindows() && Args.size() >= 2)
18307 std::swap(Args[0], Args[1]);
18308 return Args;
18309}
18310
18311SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
18312 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||(((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid
() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI
() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18315, __PRETTY_FUNCTION__))
18313 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||(((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid
() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI
() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18315, __PRETTY_FUNCTION__))
18314 Subtarget->isTargetWindows()) &&(((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid
() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI
() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18315, __PRETTY_FUNCTION__))
18315 "Register-based DivRem lowering only")(((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid
() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI
() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18315, __PRETTY_FUNCTION__))
;
18316 unsigned Opcode = Op->getOpcode();
18317 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&(((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering") ? static_cast<void
> (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18318, __PRETTY_FUNCTION__))
18318 "Invalid opcode for Div/Rem lowering")(((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering") ? static_cast<void
> (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18318, __PRETTY_FUNCTION__))
;
18319 bool isSigned = (Opcode == ISD::SDIVREM);
18320 EVT VT = Op->getValueType(0);
18321 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
18322 SDLoc dl(Op);
18323
18324 // If the target has hardware divide, use divide + multiply + subtract:
18325 // div = a / b
18326 // rem = a - b * div
18327 // return {div, rem}
18328 // This should be lowered into UDIV/SDIV + MLS later on.
18329 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
18330 : Subtarget->hasDivideInARMMode();
18331 if (hasDivide && Op->getValueType(0).isSimple() &&
18332 Op->getSimpleValueType(0) == MVT::i32) {
18333 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
18334 const SDValue Dividend = Op->getOperand(0);
18335 const SDValue Divisor = Op->getOperand(1);
18336 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
18337 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
18338 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
18339
18340 SDValue Values[2] = {Div, Rem};
18341 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
18342 }
18343
18344 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
18345 VT.getSimpleVT().SimpleTy);
18346 SDValue InChain = DAG.getEntryNode();
18347
18348 TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
18349 DAG.getContext(),
18350 Subtarget);
18351
18352 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
18353 getPointerTy(DAG.getDataLayout()));
18354
18355 Type *RetTy = StructType::get(Ty, Ty);
18356
18357 if (Subtarget->isTargetWindows())
18358 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
18359
18360 TargetLowering::CallLoweringInfo CLI(DAG);
18361 CLI.setDebugLoc(dl).setChain(InChain)
18362 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
18363 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
18364
18365 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
18366 return CallInfo.first;
18367}
18368
18369// Lowers REM using divmod helpers
18370// see RTABI section 4.2/4.3
18371SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
18372 // Build return types (div and rem)
18373 std::vector<Type*> RetTyParams;
18374 Type *RetTyElement;
18375
18376 switch (N->getValueType(0).getSimpleVT().SimpleTy) {
18377 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18377)
;
18378 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
18379 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
18380 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
18381 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
18382 }
18383
18384 RetTyParams.push_back(RetTyElement);
18385 RetTyParams.push_back(RetTyElement);
18386 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
18387 Type *RetTy = StructType::get(*DAG.getContext(), ret);
18388
18389 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
18390 SimpleTy);
18391 SDValue InChain = DAG.getEntryNode();
18392 TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
18393 Subtarget);
18394 bool isSigned = N->getOpcode() == ISD::SREM;
18395 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
18396 getPointerTy(DAG.getDataLayout()));
18397
18398 if (Subtarget->isTargetWindows())
18399 InChain = WinDBZCheckDenominator(DAG, N, InChain);
18400
18401 // Lower call
18402 CallLoweringInfo CLI(DAG);
18403 CLI.setChain(InChain)
18404 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
18405 .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
18406 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
18407
18408 // Return second (rem) result operand (first contains div)
18409 SDNode *ResNode = CallResult.first.getNode();
18410 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands")((ResNode->getNumOperands() == 2 && "divmod should return two operands"
) ? static_cast<void> (0) : __assert_fail ("ResNode->getNumOperands() == 2 && \"divmod should return two operands\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18410, __PRETTY_FUNCTION__))
;
18411 return ResNode->getOperand(1);
18412}
18413
18414SDValue
18415ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
18416 assert(Subtarget->isTargetWindows() && "unsupported target platform")((Subtarget->isTargetWindows() && "unsupported target platform"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"unsupported target platform\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18416, __PRETTY_FUNCTION__))
;
18417 SDLoc DL(Op);
18418
18419 // Get the inputs.
18420 SDValue Chain = Op.getOperand(0);
18421 SDValue Size = Op.getOperand(1);
18422
18423 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
18424 "no-stack-arg-probe")) {
18425 MaybeAlign Align =
18426 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
18427 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
18428 Chain = SP.getValue(1);
18429 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
18430 if (Align)
18431 SP =
18432 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
18433 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
18434 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
18435 SDValue Ops[2] = { SP, Chain };
18436 return DAG.getMergeValues(Ops, DL);
18437 }
18438
18439 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
18440 DAG.getConstant(2, DL, MVT::i32));
18441
18442 SDValue Flag;
18443 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
18444 Flag = Chain.getValue(1);
18445
18446 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
18447 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
18448
18449 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
18450 Chain = NewSP.getValue(1);
18451
18452 SDValue Ops[2] = { NewSP, Chain };
18453 return DAG.getMergeValues(Ops, DL);
18454}
18455
18456SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
18457 bool IsStrict = Op->isStrictFPOpcode();
18458 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
18459 const unsigned DstSz = Op.getValueType().getSizeInBits();
18460 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
18461 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&((DstSz > SrcSz && DstSz <= 64 && SrcSz
>= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18462, __PRETTY_FUNCTION__))
18462 "Unexpected type for custom-lowering FP_EXTEND")((DstSz > SrcSz && DstSz <= 64 && SrcSz
>= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18462, __PRETTY_FUNCTION__))
;
18463
18464 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base(
)) && "With both FP DP and 16, any FP conversion is legal!"
) ? static_cast<void> (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18465, __PRETTY_FUNCTION__))
18465 "With both FP DP and 16, any FP conversion is legal!")(((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base(
)) && "With both FP DP and 16, any FP conversion is legal!"
) ? static_cast<void> (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18465, __PRETTY_FUNCTION__))
;
18466
18467 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&((!(DstSz == 32 && Subtarget->hasFP16()) &&
"With FP16, 16 to 32 conversion is legal!") ? static_cast<
void> (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18468, __PRETTY_FUNCTION__))
18468 "With FP16, 16 to 32 conversion is legal!")((!(DstSz == 32 && Subtarget->hasFP16()) &&
"With FP16, 16 to 32 conversion is legal!") ? static_cast<
void> (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18468, __PRETTY_FUNCTION__))
;
18469
18470 // Converting from 32 -> 64 is valid if we have FP64.
18471 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
18472 // FIXME: Remove this when we have strict fp instruction selection patterns
18473 if (IsStrict) {
18474 SDLoc Loc(Op);
18475 SDValue Result = DAG.getNode(ISD::FP_EXTEND,
18476 Loc, Op.getValueType(), SrcVal);
18477 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
18478 }
18479 return Op;
18480 }
18481
18482 // Either we are converting from 16 -> 64, without FP16 and/or
18483 // FP.double-precision or without Armv8-fp. So we must do it in two
18484 // steps.
18485 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
18486 // without FP16. So we must do a function call.
18487 SDLoc Loc(Op);
18488 RTLIB::Libcall LC;
18489 MakeLibCallOptions CallOptions;
18490 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
18491 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
18492 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
18493 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
18494 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
18495 if (Supported) {
18496 if (IsStrict) {
18497 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
18498 {DstVT, MVT::Other}, {Chain, SrcVal});
18499 Chain = SrcVal.getValue(1);
18500 } else {
18501 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
18502 }
18503 } else {
18504 LC = RTLIB::getFPEXT(SrcVT, DstVT);
18505 assert(LC != RTLIB::UNKNOWN_LIBCALL &&((LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18506, __PRETTY_FUNCTION__))
18506 "Unexpected type for custom-lowering FP_EXTEND")((LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_EXTEND"
) ? static_cast<void> (0) : __assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18506, __PRETTY_FUNCTION__))
;
18507 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
18508 Loc, Chain);
18509 }
18510 }
18511
18512 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
18513}
18514
18515SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
18516 bool IsStrict = Op->isStrictFPOpcode();
18517
18518 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
18519 EVT SrcVT = SrcVal.getValueType();
18520 EVT DstVT = Op.getValueType();
18521 const unsigned DstSz = Op.getValueType().getSizeInBits();
18522 const unsigned SrcSz = SrcVT.getSizeInBits();
18523 (void)DstSz;
18524 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&((DstSz < SrcSz && SrcSz <= 64 && DstSz
>= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18525, __PRETTY_FUNCTION__))
18525 "Unexpected type for custom-lowering FP_ROUND")((DstSz < SrcSz && SrcSz <= 64 && DstSz
>= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18525, __PRETTY_FUNCTION__))
;
18526
18527 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base(
)) && "With both FP DP and 16, any FP conversion is legal!"
) ? static_cast<void> (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18528, __PRETTY_FUNCTION__))
18528 "With both FP DP and 16, any FP conversion is legal!")(((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base(
)) && "With both FP DP and 16, any FP conversion is legal!"
) ? static_cast<void> (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18528, __PRETTY_FUNCTION__))
;
18529
18530 SDLoc Loc(Op);
18531
18532 // Instruction from 32 -> 16 if hasFP16 is valid
18533 if (SrcSz == 32 && Subtarget->hasFP16())
18534 return Op;
18535
18536 // Lib call from 32 -> 16 / 64 -> [32, 16]
18537 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
18538 assert(LC != RTLIB::UNKNOWN_LIBCALL &&((LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18539, __PRETTY_FUNCTION__))
18539 "Unexpected type for custom-lowering FP_ROUND")((LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected type for custom-lowering FP_ROUND"
) ? static_cast<void> (0) : __assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18539, __PRETTY_FUNCTION__))
;
18540 MakeLibCallOptions CallOptions;
18541 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
18542 SDValue Result;
18543 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
18544 Loc, Chain);
18545 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
18546}
18547
18548void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
18549 SelectionDAG &DAG) const {
18550 assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.")((N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"Unexpected type (!= i64) on ABS.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18550, __PRETTY_FUNCTION__))
;
18551 MVT HalfT = MVT::i32;
18552 SDLoc dl(N);
18553 SDValue Hi, Lo, Tmp;
18554
18555 if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
18556 !isOperationLegalOrCustom(ISD::UADDO, HalfT))
18557 return ;
18558
18559 unsigned OpTypeBits = HalfT.getScalarSizeInBits();
18560 SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
18561
18562 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
18563 DAG.getConstant(0, dl, HalfT));
18564 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
18565 DAG.getConstant(1, dl, HalfT));
18566
18567 Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
18568 DAG.getConstant(OpTypeBits - 1, dl,
18569 getShiftAmountTy(HalfT, DAG.getDataLayout())));
18570 Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
18571 Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
18572 SDValue(Lo.getNode(), 1));
18573 Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
18574 Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
18575
18576 Results.push_back(Lo);
18577 Results.push_back(Hi);
18578}
18579
18580bool
18581ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
18582 // The ARM target isn't yet aware of offsets.
18583 return false;
18584}
18585
18586bool ARM::isBitFieldInvertedMask(unsigned v) {
18587 if (v == 0xffffffff)
18588 return false;
18589
18590 // there can be 1's on either or both "outsides", all the "inside"
18591 // bits must be 0's
18592 return isShiftedMask_32(~v);
18593}
18594
18595/// isFPImmLegal - Returns true if the target can instruction select the
18596/// specified FP immediate natively. If false, the legalizer will
18597/// materialize the FP immediate as a load from a constant pool.
18598bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
18599 bool ForCodeSize) const {
18600 if (!Subtarget->hasVFP3Base())
18601 return false;
18602 if (VT == MVT::f16 && Subtarget->hasFullFP16())
18603 return ARM_AM::getFP16Imm(Imm) != -1;
18604 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
18605 ARM_AM::getFP32FP16Imm(Imm) != -1)
18606 return true;
18607 if (VT == MVT::f32)
18608 return ARM_AM::getFP32Imm(Imm) != -1;
18609 if (VT == MVT::f64 && Subtarget->hasFP64())
18610 return ARM_AM::getFP64Imm(Imm) != -1;
18611 return false;
18612}
18613
18614/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
18615/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
18616/// specified in the intrinsic calls.
18617bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
18618 const CallInst &I,
18619 MachineFunction &MF,
18620 unsigned Intrinsic) const {
18621 switch (Intrinsic) {
18622 case Intrinsic::arm_neon_vld1:
18623 case Intrinsic::arm_neon_vld2:
18624 case Intrinsic::arm_neon_vld3:
18625 case Intrinsic::arm_neon_vld4:
18626 case Intrinsic::arm_neon_vld2lane:
18627 case Intrinsic::arm_neon_vld3lane:
18628 case Intrinsic::arm_neon_vld4lane:
18629 case Intrinsic::arm_neon_vld2dup:
18630 case Intrinsic::arm_neon_vld3dup:
18631 case Intrinsic::arm_neon_vld4dup: {
18632 Info.opc = ISD::INTRINSIC_W_CHAIN;
18633 // Conservatively set memVT to the entire set of vectors loaded.
18634 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18635 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
18636 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
18637 Info.ptrVal = I.getArgOperand(0);
18638 Info.offset = 0;
18639 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
18640 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
18641 // volatile loads with NEON intrinsics not supported
18642 Info.flags = MachineMemOperand::MOLoad;
18643 return true;
18644 }
18645 case Intrinsic::arm_neon_vld1x2:
18646 case Intrinsic::arm_neon_vld1x3:
18647 case Intrinsic::arm_neon_vld1x4: {
18648 Info.opc = ISD::INTRINSIC_W_CHAIN;
18649 // Conservatively set memVT to the entire set of vectors loaded.
18650 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18651 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
18652 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
18653 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
18654 Info.offset = 0;
18655 Info.align.reset();
18656 // volatile loads with NEON intrinsics not supported
18657 Info.flags = MachineMemOperand::MOLoad;
18658 return true;
18659 }
18660 case Intrinsic::arm_neon_vst1:
18661 case Intrinsic::arm_neon_vst2:
18662 case Intrinsic::arm_neon_vst3:
18663 case Intrinsic::arm_neon_vst4:
18664 case Intrinsic::arm_neon_vst2lane:
18665 case Intrinsic::arm_neon_vst3lane:
18666 case Intrinsic::arm_neon_vst4lane: {
18667 Info.opc = ISD::INTRINSIC_VOID;
18668 // Conservatively set memVT to the entire set of vectors stored.
18669 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18670 unsigned NumElts = 0;
18671 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
18672 Type *ArgTy = I.getArgOperand(ArgI)->getType();
18673 if (!ArgTy->isVectorTy())
18674 break;
18675 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
18676 }
18677 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
18678 Info.ptrVal = I.getArgOperand(0);
18679 Info.offset = 0;
18680 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
18681 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
18682 // volatile stores with NEON intrinsics not supported
18683 Info.flags = MachineMemOperand::MOStore;
18684 return true;
18685 }
18686 case Intrinsic::arm_neon_vst1x2:
18687 case Intrinsic::arm_neon_vst1x3:
18688 case Intrinsic::arm_neon_vst1x4: {
18689 Info.opc = ISD::INTRINSIC_VOID;
18690 // Conservatively set memVT to the entire set of vectors stored.
18691 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18692 unsigned NumElts = 0;
18693 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
18694 Type *ArgTy = I.getArgOperand(ArgI)->getType();
18695 if (!ArgTy->isVectorTy())
18696 break;
18697 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
18698 }
18699 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
18700 Info.ptrVal = I.getArgOperand(0);
18701 Info.offset = 0;
18702 Info.align.reset();
18703 // volatile stores with NEON intrinsics not supported
18704 Info.flags = MachineMemOperand::MOStore;
18705 return true;
18706 }
18707 case Intrinsic::arm_mve_vld2q:
18708 case Intrinsic::arm_mve_vld4q: {
18709 Info.opc = ISD::INTRINSIC_W_CHAIN;
18710 // Conservatively set memVT to the entire set of vectors loaded.
18711 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
18712 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
18713 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
18714 Info.ptrVal = I.getArgOperand(0);
18715 Info.offset = 0;
18716 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
18717 // volatile loads with MVE intrinsics not supported
18718 Info.flags = MachineMemOperand::MOLoad;
18719 return true;
18720 }
18721 case Intrinsic::arm_mve_vst2q:
18722 case Intrinsic::arm_mve_vst4q: {
18723 Info.opc = ISD::INTRINSIC_VOID;
18724 // Conservatively set memVT to the entire set of vectors stored.
18725 Type *VecTy = I.getArgOperand(1)->getType();
18726 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
18727 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
18728 Info.ptrVal = I.getArgOperand(0);
18729 Info.offset = 0;
18730 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
18731 // volatile stores with MVE intrinsics not supported
18732 Info.flags = MachineMemOperand::MOStore;
18733 return true;
18734 }
18735 case Intrinsic::arm_ldaex:
18736 case Intrinsic::arm_ldrex: {
18737 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18738 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
18739 Info.opc = ISD::INTRINSIC_W_CHAIN;
18740 Info.memVT = MVT::getVT(PtrTy->getElementType());
18741 Info.ptrVal = I.getArgOperand(0);
18742 Info.offset = 0;
18743 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
18744 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
18745 return true;
18746 }
18747 case Intrinsic::arm_stlex:
18748 case Intrinsic::arm_strex: {
18749 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
18750 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
18751 Info.opc = ISD::INTRINSIC_W_CHAIN;
18752 Info.memVT = MVT::getVT(PtrTy->getElementType());
18753 Info.ptrVal = I.getArgOperand(1);
18754 Info.offset = 0;
18755 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
18756 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18757 return true;
18758 }
18759 case Intrinsic::arm_stlexd:
18760 case Intrinsic::arm_strexd:
18761 Info.opc = ISD::INTRINSIC_W_CHAIN;
18762 Info.memVT = MVT::i64;
18763 Info.ptrVal = I.getArgOperand(2);
18764 Info.offset = 0;
18765 Info.align = Align(8);
18766 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
18767 return true;
18768
18769 case Intrinsic::arm_ldaexd:
18770 case Intrinsic::arm_ldrexd:
18771 Info.opc = ISD::INTRINSIC_W_CHAIN;
18772 Info.memVT = MVT::i64;
18773 Info.ptrVal = I.getArgOperand(0);
18774 Info.offset = 0;
18775 Info.align = Align(8);
18776 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
18777 return true;
18778
18779 default:
18780 break;
18781 }
18782
18783 return false;
18784}
18785
18786/// Returns true if it is beneficial to convert a load of a constant
18787/// to just the constant itself.
18788bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
18789 Type *Ty) const {
18790 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18790, __PRETTY_FUNCTION__))
;
18791
18792 unsigned Bits = Ty->getPrimitiveSizeInBits();
18793 if (Bits == 0 || Bits > 32)
18794 return false;
18795 return true;
18796}
18797
18798bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
18799 unsigned Index) const {
18800 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
18801 return false;
18802
18803 return (Index == 0 || Index == ResVT.getVectorNumElements());
18804}
18805
18806Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
18807 ARM_MB::MemBOpt Domain) const {
18808 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18809
18810 // First, if the target has no DMB, see what fallback we can use.
18811 if (!Subtarget->hasDataBarrier()) {
18812 // Some ARMv6 cpus can support data barriers with an mcr instruction.
18813 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
18814 // here.
18815 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
18816 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
18817 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
18818 Builder.getInt32(0), Builder.getInt32(7),
18819 Builder.getInt32(10), Builder.getInt32(5)};
18820 return Builder.CreateCall(MCR, args);
18821 } else {
18822 // Instead of using barriers, atomic accesses on these subtargets use
18823 // libcalls.
18824 llvm_unreachable("makeDMB on a target so old that it has no barriers")::llvm::llvm_unreachable_internal("makeDMB on a target so old that it has no barriers"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18824)
;
18825 }
18826 } else {
18827 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
18828 // Only a full system barrier exists in the M-class architectures.
18829 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
18830 Constant *CDomain = Builder.getInt32(Domain);
18831 return Builder.CreateCall(DMB, CDomain);
18832 }
18833}
18834
18835// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
18836Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
18837 Instruction *Inst,
18838 AtomicOrdering Ord) const {
18839 switch (Ord) {
18840 case AtomicOrdering::NotAtomic:
18841 case AtomicOrdering::Unordered:
18842 llvm_unreachable("Invalid fence: unordered/non-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/non-atomic"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18842)
;
18843 case AtomicOrdering::Monotonic:
18844 case AtomicOrdering::Acquire:
18845 return nullptr; // Nothing to do
18846 case AtomicOrdering::SequentiallyConsistent:
18847 if (!Inst->hasAtomicStore())
18848 return nullptr; // Nothing to do
18849 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18850 case AtomicOrdering::Release:
18851 case AtomicOrdering::AcquireRelease:
18852 if (Subtarget->preferISHSTBarriers())
18853 return makeDMB(Builder, ARM_MB::ISHST);
18854 // FIXME: add a comment with a link to documentation justifying this.
18855 else
18856 return makeDMB(Builder, ARM_MB::ISH);
18857 }
18858 llvm_unreachable("Unknown fence ordering in emitLeadingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitLeadingFence"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18858)
;
18859}
18860
18861Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
18862 Instruction *Inst,
18863 AtomicOrdering Ord) const {
18864 switch (Ord) {
18865 case AtomicOrdering::NotAtomic:
18866 case AtomicOrdering::Unordered:
18867 llvm_unreachable("Invalid fence: unordered/not-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/not-atomic"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18867)
;
18868 case AtomicOrdering::Monotonic:
18869 case AtomicOrdering::Release:
18870 return nullptr; // Nothing to do
18871 case AtomicOrdering::Acquire:
18872 case AtomicOrdering::AcquireRelease:
18873 case AtomicOrdering::SequentiallyConsistent:
18874 return makeDMB(Builder, ARM_MB::ISH);
18875 }
18876 llvm_unreachable("Unknown fence ordering in emitTrailingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitTrailingFence"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18876)
;
18877}
18878
18879// Loads and stores less than 64-bits are already atomic; ones above that
18880// are doomed anyway, so defer to the default libcall and blame the OS when
18881// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
18882// anything for those.
18883bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
18884 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
18885 return (Size == 64) && !Subtarget->isMClass();
18886}
18887
18888// Loads and stores less than 64-bits are already atomic; ones above that
18889// are doomed anyway, so defer to the default libcall and blame the OS when
18890// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
18891// anything for those.
18892// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
18893// guarantee, see DDI0406C ARM architecture reference manual,
18894// sections A8.8.72-74 LDRD)
18895TargetLowering::AtomicExpansionKind
18896ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
18897 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
18898 return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
18899 : AtomicExpansionKind::None;
18900}
18901
18902// For the real atomic operations, we have ldrex/strex up to 32 bits,
18903// and up to 64 bits on the non-M profiles
18904TargetLowering::AtomicExpansionKind
18905ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18906 if (AI->isFloatingPointOperation())
18907 return AtomicExpansionKind::CmpXChg;
18908
18909 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18910 bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
18911 return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
18912 ? AtomicExpansionKind::LLSC
18913 : AtomicExpansionKind::None;
18914}
18915
18916// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
18917// bits, and up to 64 bits on the non-M profiles.
18918TargetLowering::AtomicExpansionKind
18919ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
18920 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
18921 // implement cmpxchg without spilling. If the address being exchanged is also
18922 // on the stack and close enough to the spill slot, this can lead to a
18923 // situation where the monitor always gets cleared and the atomic operation
18924 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
18925 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
18926 bool HasAtomicCmpXchg =
18927 !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
18928 if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
18929 Size <= (Subtarget->isMClass() ? 32U : 64U))
18930 return AtomicExpansionKind::LLSC;
18931 return AtomicExpansionKind::None;
18932}
18933
18934bool ARMTargetLowering::shouldInsertFencesForAtomic(
18935 const Instruction *I) const {
18936 return InsertFencesForAtomic;
18937}
18938
18939// This has so far only been implemented for MachO.
18940bool ARMTargetLowering::useLoadStackGuardNode() const {
18941 return Subtarget->isTargetMachO();
18942}
18943
18944void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
18945 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
18946 return TargetLowering::insertSSPDeclarations(M);
18947
18948 // MSVC CRT has a global variable holding security cookie.
18949 M.getOrInsertGlobal("__security_cookie",
18950 Type::getInt8PtrTy(M.getContext()));
18951
18952 // MSVC CRT has a function to validate security cookie.
18953 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
18954 "__security_check_cookie", Type::getVoidTy(M.getContext()),
18955 Type::getInt8PtrTy(M.getContext()));
18956 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
18957 F->addAttribute(1, Attribute::AttrKind::InReg);
18958}
18959
18960Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
18961 // MSVC CRT has a global variable holding security cookie.
18962 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
18963 return M.getGlobalVariable("__security_cookie");
18964 return TargetLowering::getSDagStackGuard(M);
18965}
18966
18967Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
18968 // MSVC CRT has a function to validate security cookie.
18969 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
18970 return M.getFunction("__security_check_cookie");
18971 return TargetLowering::getSSPStackGuardCheck(M);
18972}
18973
18974bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
18975 unsigned &Cost) const {
18976 // If we do not have NEON, vector types are not natively supported.
18977 if (!Subtarget->hasNEON())
18978 return false;
18979
18980 // Floating point values and vector values map to the same register file.
18981 // Therefore, although we could do a store extract of a vector type, this is
18982 // better to leave at float as we have more freedom in the addressing mode for
18983 // those.
18984 if (VectorTy->isFPOrFPVectorTy())
18985 return false;
18986
18987 // If the index is unknown at compile time, this is very expensive to lower
18988 // and it is not possible to combine the store with the extract.
18989 if (!isa<ConstantInt>(Idx))
18990 return false;
18991
18992 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type")((VectorTy->isVectorTy() && "VectorTy is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VectorTy->isVectorTy() && \"VectorTy is not a vector type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18992, __PRETTY_FUNCTION__))
;
18993 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedSize();
18994 // We can do a store + vector extract on any vector that fits perfectly in a D
18995 // or Q register.
18996 if (BitWidth == 64 || BitWidth == 128) {
18997 Cost = 0;
18998 return true;
18999 }
19000 return false;
19001}
19002
19003bool ARMTargetLowering::isCheapToSpeculateCttz() const {
19004 return Subtarget->hasV6T2Ops();
19005}
19006
19007bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
19008 return Subtarget->hasV6T2Ops();
19009}
19010
19011bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
19012 return !Subtarget->hasMinSize() || Subtarget->isTargetWindows();
19013}
19014
19015Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
19016 AtomicOrdering Ord) const {
19017 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19018 Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
19019 bool IsAcquire = isAcquireOrStronger(Ord);
19020
19021 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
19022 // intrinsic must return {i32, i32} and we have to recombine them into a
19023 // single i64 here.
19024 if (ValTy->getPrimitiveSizeInBits() == 64) {
19025 Intrinsic::ID Int =
19026 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
19027 Function *Ldrex = Intrinsic::getDeclaration(M, Int);
19028
19029 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
19030 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
19031
19032 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19033 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19034 if (!Subtarget->isLittle())
19035 std::swap (Lo, Hi);
19036 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19037 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19038 return Builder.CreateOr(
19039 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
19040 }
19041
19042 Type *Tys[] = { Addr->getType() };
19043 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
19044 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
19045
19046 return Builder.CreateTruncOrBitCast(
19047 Builder.CreateCall(Ldrex, Addr),
19048 cast<PointerType>(Addr->getType())->getElementType());
19049}
19050
19051void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
19052 IRBuilder<> &Builder) const {
19053 if (!Subtarget->hasV7Ops())
19054 return;
19055 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19056 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
19057}
19058
19059Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
19060 Value *Addr,
19061 AtomicOrdering Ord) const {
19062 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19063 bool IsRelease = isReleaseOrStronger(Ord);
19064
19065 // Since the intrinsics must have legal type, the i64 intrinsics take two
19066 // parameters: "i32, i32". We must marshal Val into the appropriate form
19067 // before the call.
19068 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
19069 Intrinsic::ID Int =
19070 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
19071 Function *Strex = Intrinsic::getDeclaration(M, Int);
19072 Type *Int32Ty = Type::getInt32Ty(M->getContext());
19073
19074 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
19075 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
19076 if (!Subtarget->isLittle())
19077 std::swap(Lo, Hi);
19078 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
19079 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
19080 }
19081
19082 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
19083 Type *Tys[] = { Addr->getType() };
19084 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
19085
19086 return Builder.CreateCall(
19087 Strex, {Builder.CreateZExtOrBitCast(
19088 Val, Strex->getFunctionType()->getParamType(0)),
19089 Addr});
19090}
19091
19092
19093bool ARMTargetLowering::alignLoopsWithOptSize() const {
19094 return Subtarget->isMClass();
19095}
19096
19097/// A helper function for determining the number of interleaved accesses we
19098/// will generate when lowering accesses of the given type.
19099unsigned
19100ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
19101 const DataLayout &DL) const {
19102 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
19103}
19104
19105bool ARMTargetLowering::isLegalInterleavedAccessType(
19106 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
19107 const DataLayout &DL) const {
19108
19109 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
19110 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
19111
19112 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
19113 return false;
19114
19115 // Ensure the vector doesn't have f16 elements. Even though we could do an
19116 // i16 vldN, we can't hold the f16 vectors and will end up converting via
19117 // f32.
19118 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
19119 return false;
19120 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
19121 return false;
19122
19123 // Ensure the number of vector elements is greater than 1.
19124 if (VecTy->getNumElements() < 2)
19125 return false;
19126
19127 // Ensure the element type is legal.
19128 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
19129 return false;
19130 // And the alignment if high enough under MVE.
19131 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
19132 return false;
19133
19134 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
19135 // 128 will be split into multiple interleaved accesses.
19136 if (Subtarget->hasNEON() && VecSize == 64)
19137 return true;
19138 return VecSize % 128 == 0;
19139}
19140
19141unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {
19142 if (Subtarget->hasNEON())
19143 return 4;
19144 if (Subtarget->hasMVEIntegerOps())
19145 return MVEMaxSupportedInterleaveFactor;
19146 return TargetLoweringBase::getMaxSupportedInterleaveFactor();
19147}
19148
19149/// Lower an interleaved load into a vldN intrinsic.
19150///
19151/// E.g. Lower an interleaved load (Factor = 2):
19152/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
19153/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
19154/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
19155///
19156/// Into:
19157/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
19158/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
19159/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
19160bool ARMTargetLowering::lowerInterleavedLoad(
19161 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
19162 ArrayRef<unsigned> Indices, unsigned Factor) const {
19163 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19164, __PRETTY_FUNCTION__))
19164 "Invalid interleave factor")((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19164, __PRETTY_FUNCTION__))
;
19165 assert(!Shuffles.empty() && "Empty shufflevector input")((!Shuffles.empty() && "Empty shufflevector input") ?
static_cast<void> (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19165, __PRETTY_FUNCTION__))
;
19166 assert(Shuffles.size() == Indices.size() &&((Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"
) ? static_cast<void> (0) : __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19167, __PRETTY_FUNCTION__))
19167 "Unmatched number of shufflevectors and indices")((Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"
) ? static_cast<void> (0) : __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19167, __PRETTY_FUNCTION__))
;
19168
19169 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
19170 Type *EltTy = VecTy->getElementType();
19171
19172 const DataLayout &DL = LI->getModule()->getDataLayout();
19173 Align Alignment = LI->getAlign();
19174
19175 // Skip if we do not have NEON and skip illegal vector types. We can
19176 // "legalize" wide vector types into multiple interleaved accesses as long as
19177 // the vector types are divisible by 128.
19178 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
19179 return false;
19180
19181 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
19182
19183 // A pointer vector can not be the return type of the ldN intrinsics. Need to
19184 // load integer vectors first and then convert to pointer vectors.
19185 if (EltTy->isPointerTy())
19186 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
19187
19188 IRBuilder<> Builder(LI);
19189
19190 // The base address of the load.
19191 Value *BaseAddr = LI->getPointerOperand();
19192
19193 if (NumLoads > 1) {
19194 // If we're going to generate more than one load, reset the sub-vector type
19195 // to something legal.
19196 VecTy = FixedVectorType::get(VecTy->getElementType(),
19197 VecTy->getNumElements() / NumLoads);
19198
19199 // We will compute the pointer operand of each load from the original base
19200 // address using GEPs. Cast the base address to a pointer to the scalar
19201 // element type.
19202 BaseAddr = Builder.CreateBitCast(
19203 BaseAddr,
19204 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
19205 }
19206
19207 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!")((isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!"
) ? static_cast<void> (0) : __assert_fail ("isTypeLegal(EVT::getEVT(VecTy)) && \"Illegal vldN vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19207, __PRETTY_FUNCTION__))
;
19208
19209 auto createLoadIntrinsic = [&](Value *BaseAddr) {
19210 if (Subtarget->hasNEON()) {
19211 Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
19212 Type *Tys[] = {VecTy, Int8Ptr};
19213 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
19214 Intrinsic::arm_neon_vld3,
19215 Intrinsic::arm_neon_vld4};
19216 Function *VldnFunc =
19217 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
19218
19219 SmallVector<Value *, 2> Ops;
19220 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
19221 Ops.push_back(Builder.getInt32(LI->getAlignment()));
19222
19223 return Builder.CreateCall(VldnFunc, Ops, "vldN");
19224 } else {
19225 assert((Factor == 2 || Factor == 4) &&(((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"
) ? static_cast<void> (0) : __assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19226, __PRETTY_FUNCTION__))
19226 "expected interleave factor of 2 or 4 for MVE")(((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"
) ? static_cast<void> (0) : __assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19226, __PRETTY_FUNCTION__))
;
19227 Intrinsic::ID LoadInts =
19228 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
19229 Type *VecEltTy =
19230 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
19231 Type *Tys[] = {VecTy, VecEltTy};
19232 Function *VldnFunc =
19233 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
19234
19235 SmallVector<Value *, 2> Ops;
19236 Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
19237 return Builder.CreateCall(VldnFunc, Ops, "vldN");
19238 }
19239 };
19240
19241 // Holds sub-vectors extracted from the load intrinsic return values. The
19242 // sub-vectors are associated with the shufflevector instructions they will
19243 // replace.
19244 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
19245
19246 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
19247 // If we're generating more than one load, compute the base address of
19248 // subsequent loads as an offset from the previous.
19249 if (LoadCount > 0)
19250 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
19251 VecTy->getNumElements() * Factor);
19252
19253 CallInst *VldN = createLoadIntrinsic(BaseAddr);
19254
19255 // Replace uses of each shufflevector with the corresponding vector loaded
19256 // by ldN.
19257 for (unsigned i = 0; i < Shuffles.size(); i++) {
19258 ShuffleVectorInst *SV = Shuffles[i];
19259 unsigned Index = Indices[i];
19260
19261 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
19262
19263 // Convert the integer vector to pointer vector if the element is pointer.
19264 if (EltTy->isPointerTy())
19265 SubVec = Builder.CreateIntToPtr(
19266 SubVec,
19267 FixedVectorType::get(SV->getType()->getElementType(), VecTy));
19268
19269 SubVecs[SV].push_back(SubVec);
19270 }
19271 }
19272
19273 // Replace uses of the shufflevector instructions with the sub-vectors
19274 // returned by the load intrinsic. If a shufflevector instruction is
19275 // associated with more than one sub-vector, those sub-vectors will be
19276 // concatenated into a single wide vector.
19277 for (ShuffleVectorInst *SVI : Shuffles) {
19278 auto &SubVec = SubVecs[SVI];
19279 auto *WideVec =
19280 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
19281 SVI->replaceAllUsesWith(WideVec);
19282 }
19283
19284 return true;
19285}
19286
19287/// Lower an interleaved store into a vstN intrinsic.
19288///
19289/// E.g. Lower an interleaved store (Factor = 3):
19290/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
19291/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
19292/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
19293///
19294/// Into:
19295/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
19296/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
19297/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
19298/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
19299///
19300/// Note that the new shufflevectors will be removed and we'll only generate one
19301/// vst3 instruction in CodeGen.
19302///
19303/// Example for a more general valid mask (Factor 3). Lower:
19304/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
19305/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
19306/// store <12 x i32> %i.vec, <12 x i32>* %ptr
19307///
19308/// Into:
19309/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
19310/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
19311/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
19312/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
19313bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
19314 ShuffleVectorInst *SVI,
19315 unsigned Factor) const {
19316 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19317, __PRETTY_FUNCTION__))
19317 "Invalid interleave factor")((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19317, __PRETTY_FUNCTION__))
;
19318
19319 auto *VecTy = cast<FixedVectorType>(SVI->getType());
19320 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")((VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"
) ? static_cast<void> (0) : __assert_fail ("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19320, __PRETTY_FUNCTION__))
;
19321
19322 unsigned LaneLen = VecTy->getNumElements() / Factor;
19323 Type *EltTy = VecTy->getElementType();
19324 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
19325
19326 const DataLayout &DL = SI->getModule()->getDataLayout();
19327 Align Alignment = SI->getAlign();
19328
19329 // Skip if we do not have NEON and skip illegal vector types. We can
19330 // "legalize" wide vector types into multiple interleaved accesses as long as
19331 // the vector types are divisible by 128.
19332 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
19333 return false;
19334
19335 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
19336
19337 Value *Op0 = SVI->getOperand(0);
19338 Value *Op1 = SVI->getOperand(1);
19339 IRBuilder<> Builder(SI);
19340
19341 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
19342 // vectors to integer vectors.
19343 if (EltTy->isPointerTy()) {
19344 Type *IntTy = DL.getIntPtrType(EltTy);
19345
19346 // Convert to the corresponding integer vector.
19347 auto *IntVecTy =
19348 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
19349 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
19350 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
19351
19352 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
19353 }
19354
19355 // The base address of the store.
19356 Value *BaseAddr = SI->getPointerOperand();
19357
19358 if (NumStores > 1) {
19359 // If we're going to generate more than one store, reset the lane length
19360 // and sub-vector type to something legal.
19361 LaneLen /= NumStores;
19362 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
19363
19364 // We will compute the pointer operand of each store from the original base
19365 // address using GEPs. Cast the base address to a pointer to the scalar
19366 // element type.
19367 BaseAddr = Builder.CreateBitCast(
19368 BaseAddr,
19369 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
19370 }
19371
19372 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!")((isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!"
) ? static_cast<void> (0) : __assert_fail ("isTypeLegal(EVT::getEVT(SubVecTy)) && \"Illegal vstN vector type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19372, __PRETTY_FUNCTION__))
;
19373
19374 auto Mask = SVI->getShuffleMask();
19375
19376 auto createStoreIntrinsic = [&](Value *BaseAddr,
19377 SmallVectorImpl<Value *> &Shuffles) {
19378 if (Subtarget->hasNEON()) {
19379 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
19380 Intrinsic::arm_neon_vst3,
19381 Intrinsic::arm_neon_vst4};
19382 Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
19383 Type *Tys[] = {Int8Ptr, SubVecTy};
19384
19385 Function *VstNFunc = Intrinsic::getDeclaration(
19386 SI->getModule(), StoreInts[Factor - 2], Tys);
19387
19388 SmallVector<Value *, 6> Ops;
19389 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
19390 append_range(Ops, Shuffles);
19391 Ops.push_back(Builder.getInt32(SI->getAlignment()));
19392 Builder.CreateCall(VstNFunc, Ops);
19393 } else {
19394 assert((Factor == 2 || Factor == 4) &&(((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"
) ? static_cast<void> (0) : __assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19395, __PRETTY_FUNCTION__))
19395 "expected interleave factor of 2 or 4 for MVE")(((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"
) ? static_cast<void> (0) : __assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19395, __PRETTY_FUNCTION__))
;
19396 Intrinsic::ID StoreInts =
19397 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
19398 Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
19399 SI->getPointerAddressSpace());
19400 Type *Tys[] = {EltPtrTy, SubVecTy};
19401 Function *VstNFunc =
19402 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
19403
19404 SmallVector<Value *, 6> Ops;
19405 Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
19406 append_range(Ops, Shuffles);
19407 for (unsigned F = 0; F < Factor; F++) {
19408 Ops.push_back(Builder.getInt32(F));
19409 Builder.CreateCall(VstNFunc, Ops);
19410 Ops.pop_back();
19411 }
19412 }
19413 };
19414
19415 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
19416 // If we generating more than one store, we compute the base address of
19417 // subsequent stores as an offset from the previous.
19418 if (StoreCount > 0)
19419 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
19420 BaseAddr, LaneLen * Factor);
19421
19422 SmallVector<Value *, 4> Shuffles;
19423
19424 // Split the shufflevector operands into sub vectors for the new vstN call.
19425 for (unsigned i = 0; i < Factor; i++) {
19426 unsigned IdxI = StoreCount * LaneLen * Factor + i;
19427 if (Mask[IdxI] >= 0) {
19428 Shuffles.push_back(Builder.CreateShuffleVector(
19429 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
19430 } else {
19431 unsigned StartMask = 0;
19432 for (unsigned j = 1; j < LaneLen; j++) {
19433 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
19434 if (Mask[IdxJ * Factor + IdxI] >= 0) {
19435 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
19436 break;
19437 }
19438 }
19439 // Note: If all elements in a chunk are undefs, StartMask=0!
19440 // Note: Filling undef gaps with random elements is ok, since
19441 // those elements were being written anyway (with undefs).
19442 // In the case of all undefs we're defaulting to using elems from 0
19443 // Note: StartMask cannot be negative, it's checked in
19444 // isReInterleaveMask
19445 Shuffles.push_back(Builder.CreateShuffleVector(
19446 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
19447 }
19448 }
19449
19450 createStoreIntrinsic(BaseAddr, Shuffles);
19451 }
19452 return true;
19453}
19454
19455enum HABaseType {
19456 HA_UNKNOWN = 0,
19457 HA_FLOAT,
19458 HA_DOUBLE,
19459 HA_VECT64,
19460 HA_VECT128
19461};
19462
19463static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
19464 uint64_t &Members) {
19465 if (auto *ST = dyn_cast<StructType>(Ty)) {
19466 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
19467 uint64_t SubMembers = 0;
19468 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
19469 return false;
19470 Members += SubMembers;
19471 }
19472 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
19473 uint64_t SubMembers = 0;
19474 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
19475 return false;
19476 Members += SubMembers * AT->getNumElements();
19477 } else if (Ty->isFloatTy()) {
19478 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
19479 return false;
19480 Members = 1;
19481 Base = HA_FLOAT;
19482 } else if (Ty->isDoubleTy()) {
19483 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
19484 return false;
19485 Members = 1;
19486 Base = HA_DOUBLE;
19487 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
19488 Members = 1;
19489 switch (Base) {
19490 case HA_FLOAT:
19491 case HA_DOUBLE:
19492 return false;
19493 case HA_VECT64:
19494 return VT->getPrimitiveSizeInBits().getFixedSize() == 64;
19495 case HA_VECT128:
19496 return VT->getPrimitiveSizeInBits().getFixedSize() == 128;
19497 case HA_UNKNOWN:
19498 switch (VT->getPrimitiveSizeInBits().getFixedSize()) {
19499 case 64:
19500 Base = HA_VECT64;
19501 return true;
19502 case 128:
19503 Base = HA_VECT128;
19504 return true;
19505 default:
19506 return false;
19507 }
19508 }
19509 }
19510
19511 return (Members > 0 && Members <= 4);
19512}
19513
19514/// Return the correct alignment for the current calling convention.
19515Align ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy,
19516 DataLayout DL) const {
19517 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
19518 if (!ArgTy->isVectorTy())
19519 return ABITypeAlign;
19520
19521 // Avoid over-aligning vector parameters. It would require realigning the
19522 // stack and waste space for no real benefit.
19523 return std::min(ABITypeAlign, DL.getStackAlignment());
19524}
19525
19526/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
19527/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
19528/// passing according to AAPCS rules.
19529bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
19530 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
19531 if (getEffectiveCallingConv(CallConv, isVarArg) !=
19532 CallingConv::ARM_AAPCS_VFP)
19533 return false;
19534
19535 HABaseType Base = HA_UNKNOWN;
19536 uint64_t Members = 0;
19537 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
19538 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "isHA: " << IsHA <<
" "; Ty->dump(); } } while (false)
;
19539
19540 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
19541 return IsHA || IsIntArray;
19542}
19543
19544Register ARMTargetLowering::getExceptionPointerRegister(
19545 const Constant *PersonalityFn) const {
19546 // Platforms which do not use SjLj EH may return values in these registers
19547 // via the personality function.
19548 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
19549}
19550
19551Register ARMTargetLowering::getExceptionSelectorRegister(
19552 const Constant *PersonalityFn) const {
19553 // Platforms which do not use SjLj EH may return values in these registers
19554 // via the personality function.
19555 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
19556}
19557
19558void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
19559 // Update IsSplitCSR in ARMFunctionInfo.
19560 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
19561 AFI->setIsSplitCSR(true);
19562}
19563
19564void ARMTargetLowering::insertCopiesSplitCSR(
19565 MachineBasicBlock *Entry,
19566 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
19567 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
19568 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
19569 if (!IStart)
19570 return;
19571
19572 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
19573 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
19574 MachineBasicBlock::iterator MBBI = Entry->begin();
19575 for (const MCPhysReg *I = IStart; *I; ++I) {
19576 const TargetRegisterClass *RC = nullptr;
19577 if (ARM::GPRRegClass.contains(*I))
19578 RC = &ARM::GPRRegClass;
19579 else if (ARM::DPRRegClass.contains(*I))
19580 RC = &ARM::DPRRegClass;
19581 else
19582 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19582)
;
19583
19584 Register NewVR = MRI->createVirtualRegister(RC);
19585 // Create copy from CSR to a virtual register.
19586 // FIXME: this currently does not emit CFI pseudo-instructions, it works
19587 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
19588 // nounwind. If we want to generalize this later, we may need to emit
19589 // CFI pseudo-instructions.
19590 assert(Entry->getParent()->getFunction().hasFnAttribute(((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19592, __PRETTY_FUNCTION__))
19591 Attribute::NoUnwind) &&((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19592, __PRETTY_FUNCTION__))
19592 "Function should be nounwind in insertCopiesSplitCSR!")((Entry->getParent()->getFunction().hasFnAttribute( Attribute
::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? static_cast<void> (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19592, __PRETTY_FUNCTION__))
;
19593 Entry->addLiveIn(*I);
19594 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
19595 .addReg(*I);
19596
19597 // Insert the copy-back instructions right before the terminator.
19598 for (auto *Exit : Exits)
19599 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
19600 TII->get(TargetOpcode::COPY), *I)
19601 .addReg(NewVR);
19602 }
19603}
19604
19605void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
19606 MF.getFrameInfo().computeMaxCallFrameSize(MF);
19607 TargetLoweringBase::finalizeLowering(MF);
19608}

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h

1//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SelectionDAG class, and transitively defines the
10// SDNode class and subclasses.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CODEGEN_SELECTIONDAG_H
15#define LLVM_CODEGEN_SELECTIONDAG_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/ADT/ArrayRef.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/DenseSet.h"
22#include "llvm/ADT/FoldingSet.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/ilist.h"
27#include "llvm/ADT/iterator.h"
28#include "llvm/ADT/iterator_range.h"
29#include "llvm/CodeGen/DAGCombine.h"
30#include "llvm/CodeGen/ISDOpcodes.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/SelectionDAGNodes.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/Support/Allocator.h"
39#include "llvm/Support/ArrayRecycler.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CodeGen.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/MachineValueType.h"
45#include "llvm/Support/RecyclingAllocator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstdint>
49#include <functional>
50#include <map>
51#include <string>
52#include <tuple>
53#include <utility>
54#include <vector>
55
56namespace llvm {
57
58class AAResults;
59class BlockAddress;
60class BlockFrequencyInfo;
61class Constant;
62class ConstantFP;
63class ConstantInt;
64class DataLayout;
65struct fltSemantics;
66class FunctionLoweringInfo;
67class GlobalValue;
68struct KnownBits;
69class LegacyDivergenceAnalysis;
70class LLVMContext;
71class MachineBasicBlock;
72class MachineConstantPoolValue;
73class MCSymbol;
74class OptimizationRemarkEmitter;
75class ProfileSummaryInfo;
76class SDDbgValue;
77class SDDbgOperand;
78class SDDbgLabel;
79class SelectionDAG;
80class SelectionDAGTargetInfo;
81class TargetLibraryInfo;
82class TargetLowering;
83class TargetMachine;
84class TargetSubtargetInfo;
85class Value;
86
87class SDVTListNode : public FoldingSetNode {
88 friend struct FoldingSetTrait<SDVTListNode>;
89
90 /// A reference to an Interned FoldingSetNodeID for this node.
91 /// The Allocator in SelectionDAG holds the data.
92 /// SDVTList contains all types which are frequently accessed in SelectionDAG.
93 /// The size of this list is not expected to be big so it won't introduce
94 /// a memory penalty.
95 FoldingSetNodeIDRef FastID;
96 const EVT *VTs;
97 unsigned int NumVTs;
98 /// The hash value for SDVTList is fixed, so cache it to avoid
99 /// hash calculation.
100 unsigned HashValue;
101
102public:
103 SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) :
104 FastID(ID), VTs(VT), NumVTs(Num) {
105 HashValue = ID.ComputeHash();
106 }
107
108 SDVTList getSDVTList() {
109 SDVTList result = {VTs, NumVTs};
110 return result;
111 }
112};
113
114/// Specialize FoldingSetTrait for SDVTListNode
115/// to avoid computing temp FoldingSetNodeID and hash value.
116template<> struct FoldingSetTrait<SDVTListNode> : DefaultFoldingSetTrait<SDVTListNode> {
117 static void Profile(const SDVTListNode &X, FoldingSetNodeID& ID) {
118 ID = X.FastID;
119 }
120
121 static bool Equals(const SDVTListNode &X, const FoldingSetNodeID &ID,
122 unsigned IDHash, FoldingSetNodeID &TempID) {
123 if (X.HashValue != IDHash)
124 return false;
125 return ID == X.FastID;
126 }
127
128 static unsigned ComputeHash(const SDVTListNode &X, FoldingSetNodeID &TempID) {
129 return X.HashValue;
130 }
131};
132
133template <> struct ilist_alloc_traits<SDNode> {
134 static void deleteNode(SDNode *) {
135 llvm_unreachable("ilist_traits<SDNode> shouldn't see a deleteNode call!")::llvm::llvm_unreachable_internal("ilist_traits<SDNode> shouldn't see a deleteNode call!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 135)
;
136 }
137};
138
139/// Keeps track of dbg_value information through SDISel. We do
140/// not build SDNodes for these so as not to perturb the generated code;
141/// instead the info is kept off to the side in this structure. Each SDNode may
142/// have one or more associated dbg_value entries. This information is kept in
143/// DbgValMap.
144/// Byval parameters are handled separately because they don't use alloca's,
145/// which busts the normal mechanism. There is good reason for handling all
146/// parameters separately: they may not have code generated for them, they
147/// should always go at the beginning of the function regardless of other code
148/// motion, and debug info for them is potentially useful even if the parameter
149/// is unused. Right now only byval parameters are handled separately.
150class SDDbgInfo {
151 BumpPtrAllocator Alloc;
152 SmallVector<SDDbgValue*, 32> DbgValues;
153 SmallVector<SDDbgValue*, 32> ByvalParmDbgValues;
154 SmallVector<SDDbgLabel*, 4> DbgLabels;
155 using DbgValMapType = DenseMap<const SDNode *, SmallVector<SDDbgValue *, 2>>;
156 DbgValMapType DbgValMap;
157
158public:
159 SDDbgInfo() = default;
160 SDDbgInfo(const SDDbgInfo &) = delete;
161 SDDbgInfo &operator=(const SDDbgInfo &) = delete;
162
163 void add(SDDbgValue *V, bool isParameter);
164
165 void add(SDDbgLabel *L) { DbgLabels.push_back(L); }
166
167 /// Invalidate all DbgValues attached to the node and remove
168 /// it from the Node-to-DbgValues map.
169 void erase(const SDNode *Node);
170
171 void clear() {
172 DbgValMap.clear();
173 DbgValues.clear();
174 ByvalParmDbgValues.clear();
175 DbgLabels.clear();
176 Alloc.Reset();
177 }
178
179 BumpPtrAllocator &getAlloc() { return Alloc; }
180
181 bool empty() const {
182 return DbgValues.empty() && ByvalParmDbgValues.empty() && DbgLabels.empty();
183 }
184
185 ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) const {
186 auto I = DbgValMap.find(Node);
187 if (I != DbgValMap.end())
188 return I->second;
189 return ArrayRef<SDDbgValue*>();
190 }
191
192 using DbgIterator = SmallVectorImpl<SDDbgValue*>::iterator;
193 using DbgLabelIterator = SmallVectorImpl<SDDbgLabel*>::iterator;
194
195 DbgIterator DbgBegin() { return DbgValues.begin(); }
196 DbgIterator DbgEnd() { return DbgValues.end(); }
197 DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); }
198 DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); }
199 DbgLabelIterator DbgLabelBegin() { return DbgLabels.begin(); }
200 DbgLabelIterator DbgLabelEnd() { return DbgLabels.end(); }
201};
202
203void checkForCycles(const SelectionDAG *DAG, bool force = false);
204
205/// This is used to represent a portion of an LLVM function in a low-level
206/// Data Dependence DAG representation suitable for instruction selection.
207/// This DAG is constructed as the first step of instruction selection in order
208/// to allow implementation of machine specific optimizations
209/// and code simplifications.
210///
211/// The representation used by the SelectionDAG is a target-independent
212/// representation, which has some similarities to the GCC RTL representation,
213/// but is significantly more simple, powerful, and is a graph form instead of a
214/// linear form.
215///
216class SelectionDAG {
217 const TargetMachine &TM;
218 const SelectionDAGTargetInfo *TSI = nullptr;
219 const TargetLowering *TLI = nullptr;
220 const TargetLibraryInfo *LibInfo = nullptr;
221 MachineFunction *MF;
222 Pass *SDAGISelPass = nullptr;
223 LLVMContext *Context;
224 CodeGenOpt::Level OptLevel;
225
226 LegacyDivergenceAnalysis * DA = nullptr;
227 FunctionLoweringInfo * FLI = nullptr;
228
229 /// The function-level optimization remark emitter. Used to emit remarks
230 /// whenever manipulating the DAG.
231 OptimizationRemarkEmitter *ORE;
232
233 ProfileSummaryInfo *PSI = nullptr;
234 BlockFrequencyInfo *BFI = nullptr;
235
236 /// The starting token.
237 SDNode EntryNode;
238
239 /// The root of the entire DAG.
240 SDValue Root;
241
242 /// A linked list of nodes in the current DAG.
243 ilist<SDNode> AllNodes;
244
245 /// The AllocatorType for allocating SDNodes. We use
246 /// pool allocation with recycling.
247 using NodeAllocatorType = RecyclingAllocator<BumpPtrAllocator, SDNode,
248 sizeof(LargestSDNode),
249 alignof(MostAlignedSDNode)>;
250
251 /// Pool allocation for nodes.
252 NodeAllocatorType NodeAllocator;
253
254 /// This structure is used to memoize nodes, automatically performing
255 /// CSE with existing nodes when a duplicate is requested.
256 FoldingSet<SDNode> CSEMap;
257
258 /// Pool allocation for machine-opcode SDNode operands.
259 BumpPtrAllocator OperandAllocator;
260 ArrayRecycler<SDUse> OperandRecycler;
261
262 /// Pool allocation for misc. objects that are created once per SelectionDAG.
263 BumpPtrAllocator Allocator;
264
265 /// Tracks dbg_value and dbg_label information through SDISel.
266 SDDbgInfo *DbgInfo;
267
268 using CallSiteInfo = MachineFunction::CallSiteInfo;
269 using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
270
271 struct CallSiteDbgInfo {
272 CallSiteInfo CSInfo;
273 MDNode *HeapAllocSite = nullptr;
274 bool NoMerge = false;
275 };
276
277 DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
278
279 uint16_t NextPersistentId = 0;
280
281public:
282 /// Clients of various APIs that cause global effects on
283 /// the DAG can optionally implement this interface. This allows the clients
284 /// to handle the various sorts of updates that happen.
285 ///
286 /// A DAGUpdateListener automatically registers itself with DAG when it is
287 /// constructed, and removes itself when destroyed in RAII fashion.
288 struct DAGUpdateListener {
289 DAGUpdateListener *const Next;
290 SelectionDAG &DAG;
291
292 explicit DAGUpdateListener(SelectionDAG &D)
293 : Next(D.UpdateListeners), DAG(D) {
294 DAG.UpdateListeners = this;
295 }
296
297 virtual ~DAGUpdateListener() {
298 assert(DAG.UpdateListeners == this &&((DAG.UpdateListeners == this && "DAGUpdateListeners must be destroyed in LIFO order"
) ? static_cast<void> (0) : __assert_fail ("DAG.UpdateListeners == this && \"DAGUpdateListeners must be destroyed in LIFO order\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 299, __PRETTY_FUNCTION__))
299 "DAGUpdateListeners must be destroyed in LIFO order")((DAG.UpdateListeners == this && "DAGUpdateListeners must be destroyed in LIFO order"
) ? static_cast<void> (0) : __assert_fail ("DAG.UpdateListeners == this && \"DAGUpdateListeners must be destroyed in LIFO order\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 299, __PRETTY_FUNCTION__))
;
300 DAG.UpdateListeners = Next;
301 }
302
303 /// The node N that was deleted and, if E is not null, an
304 /// equivalent node E that replaced it.
305 virtual void NodeDeleted(SDNode *N, SDNode *E);
306
307 /// The node N that was updated.
308 virtual void NodeUpdated(SDNode *N);
309
310 /// The node N that was inserted.
311 virtual void NodeInserted(SDNode *N);
312 };
313
314 struct DAGNodeDeletedListener : public DAGUpdateListener {
315 std::function<void(SDNode *, SDNode *)> Callback;
316
317 DAGNodeDeletedListener(SelectionDAG &DAG,
318 std::function<void(SDNode *, SDNode *)> Callback)
319 : DAGUpdateListener(DAG), Callback(std::move(Callback)) {}
320
321 void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); }
322
323 private:
324 virtual void anchor();
325 };
326
327 /// Help to insert SDNodeFlags automatically in transforming. Use
328 /// RAII to save and resume flags in current scope.
329 class FlagInserter {
330 SelectionDAG &DAG;
331 SDNodeFlags Flags;
332 FlagInserter *LastInserter;
333
334 public:
335 FlagInserter(SelectionDAG &SDAG, SDNodeFlags Flags)
336 : DAG(SDAG), Flags(Flags),
337 LastInserter(SDAG.getFlagInserter()) {
338 SDAG.setFlagInserter(this);
339 }
340 FlagInserter(SelectionDAG &SDAG, SDNode *N)
341 : FlagInserter(SDAG, N->getFlags()) {}
342
343 FlagInserter(const FlagInserter &) = delete;
344 FlagInserter &operator=(const FlagInserter &) = delete;
345 ~FlagInserter() { DAG.setFlagInserter(LastInserter); }
346
347 SDNodeFlags getFlags() const { return Flags; }
348 };
349
350 /// When true, additional steps are taken to
351 /// ensure that getConstant() and similar functions return DAG nodes that
352 /// have legal types. This is important after type legalization since
353 /// any illegally typed nodes generated after this point will not experience
354 /// type legalization.
355 bool NewNodesMustHaveLegalTypes = false;
356
357private:
358 /// DAGUpdateListener is a friend so it can manipulate the listener stack.
359 friend struct DAGUpdateListener;
360
361 /// Linked list of registered DAGUpdateListener instances.
362 /// This stack is maintained by DAGUpdateListener RAII.
363 DAGUpdateListener *UpdateListeners = nullptr;
364
365 /// Implementation of setSubgraphColor.
366 /// Return whether we had to truncate the search.
367 bool setSubgraphColorHelper(SDNode *N, const char *Color,
368 DenseSet<SDNode *> &visited,
369 int level, bool &printed);
370
371 template <typename SDNodeT, typename... ArgTypes>
372 SDNodeT *newSDNode(ArgTypes &&... Args) {
373 return new (NodeAllocator.template Allocate<SDNodeT>())
374 SDNodeT(std::forward<ArgTypes>(Args)...);
375 }
376
377 /// Build a synthetic SDNodeT with the given args and extract its subclass
378 /// data as an integer (e.g. for use in a folding set).
379 ///
380 /// The args to this function are the same as the args to SDNodeT's
381 /// constructor, except the second arg (assumed to be a const DebugLoc&) is
382 /// omitted.
383 template <typename SDNodeT, typename... ArgTypes>
384 static uint16_t getSyntheticNodeSubclassData(unsigned IROrder,
385 ArgTypes &&... Args) {
386 // The compiler can reduce this expression to a constant iff we pass an
387 // empty DebugLoc. Thankfully, the debug location doesn't have any bearing
388 // on the subclass data.
389 return SDNodeT(IROrder, DebugLoc(), std::forward<ArgTypes>(Args)...)
390 .getRawSubclassData();
391 }
392
393 template <typename SDNodeTy>
394 static uint16_t getSyntheticNodeSubclassData(unsigned Opc, unsigned Order,
395 SDVTList VTs, EVT MemoryVT,
396 MachineMemOperand *MMO) {
397 return SDNodeTy(Opc, Order, DebugLoc(), VTs, MemoryVT, MMO)
398 .getRawSubclassData();
399 }
400
401 void createOperands(SDNode *Node, ArrayRef<SDValue> Vals);
402
403 void removeOperands(SDNode *Node) {
404 if (!Node->OperandList)
405 return;
406 OperandRecycler.deallocate(
407 ArrayRecycler<SDUse>::Capacity::get(Node->NumOperands),
408 Node->OperandList);
409 Node->NumOperands = 0;
410 Node->OperandList = nullptr;
411 }
412 void CreateTopologicalOrder(std::vector<SDNode*>& Order);
413
414public:
415 // Maximum depth for recursive analysis such as computeKnownBits, etc.
416 static constexpr unsigned MaxRecursionDepth = 6;
417
418 explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
419 SelectionDAG(const SelectionDAG &) = delete;
420 SelectionDAG &operator=(const SelectionDAG &) = delete;
421 ~SelectionDAG();
422
423 /// Prepare this SelectionDAG to process code in the given MachineFunction.
424 void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
425 Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
426 LegacyDivergenceAnalysis * Divergence,
427 ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin);
428
429 void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
430 FLI = FuncInfo;
431 }
432
433 /// Clear state and free memory necessary to make this
434 /// SelectionDAG ready to process a new block.
435 void clear();
436
437 MachineFunction &getMachineFunction() const { return *MF; }
438 const Pass *getPass() const { return SDAGISelPass; }
439
440 const DataLayout &getDataLayout() const { return MF->getDataLayout(); }
441 const TargetMachine &getTarget() const { return TM; }
442 const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); }
443 const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
444 const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
445 const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
446 const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
447 LLVMContext *getContext() const { return Context; }
448 OptimizationRemarkEmitter &getORE() const { return *ORE; }
449 ProfileSummaryInfo *getPSI() const { return PSI; }
450 BlockFrequencyInfo *getBFI() const { return BFI; }
451
452 FlagInserter *getFlagInserter() { return Inserter; }
453 void setFlagInserter(FlagInserter *FI) { Inserter = FI; }
454
455 /// Just dump dot graph to a user-provided path and title.
456 /// This doesn't open the dot viewer program and
457 /// helps visualization when outside debugging session.
458 /// FileName expects absolute path. If provided
459 /// without any path separators then the file
460 /// will be created in the current directory.
461 /// Error will be emitted if the path is insane.
462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
463 LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void dumpDotGraph(const Twine &FileName, const Twine &Title);
464#endif
465
466 /// Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
467 void viewGraph(const std::string &Title);
468 void viewGraph();
469
470#ifndef NDEBUG
471 std::map<const SDNode *, std::string> NodeGraphAttrs;
472#endif
473
474 /// Clear all previously defined node graph attributes.
475 /// Intended to be used from a debugging tool (eg. gdb).
476 void clearGraphAttrs();
477
478 /// Set graph attributes for a node. (eg. "color=red".)
479 void setGraphAttrs(const SDNode *N, const char *Attrs);
480
481 /// Get graph attributes for a node. (eg. "color=red".)
482 /// Used from getNodeAttributes.
483 std::string getGraphAttrs(const SDNode *N) const;
484
485 /// Convenience for setting node color attribute.
486 void setGraphColor(const SDNode *N, const char *Color);
487
488 /// Convenience for setting subgraph color attribute.
489 void setSubgraphColor(SDNode *N, const char *Color);
490
491 using allnodes_const_iterator = ilist<SDNode>::const_iterator;
492
493 allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); }
494 allnodes_const_iterator allnodes_end() const { return AllNodes.end(); }
495
496 using allnodes_iterator = ilist<SDNode>::iterator;
497
498 allnodes_iterator allnodes_begin() { return AllNodes.begin(); }
499 allnodes_iterator allnodes_end() { return AllNodes.end(); }
500
501 ilist<SDNode>::size_type allnodes_size() const {
502 return AllNodes.size();
503 }
504
505 iterator_range<allnodes_iterator> allnodes() {
506 return make_range(allnodes_begin(), allnodes_end());
507 }
508 iterator_range<allnodes_const_iterator> allnodes() const {
509 return make_range(allnodes_begin(), allnodes_end());
510 }
511
512 /// Return the root tag of the SelectionDAG.
513 const SDValue &getRoot() const { return Root; }
514
515 /// Return the token chain corresponding to the entry of the function.
516 SDValue getEntryNode() const {
517 return SDValue(const_cast<SDNode *>(&EntryNode), 0);
518 }
519
520 /// Set the current root tag of the SelectionDAG.
521 ///
522 const SDValue &setRoot(SDValue N) {
523 assert((!N.getNode() || N.getValueType() == MVT::Other) &&(((!N.getNode() || N.getValueType() == MVT::Other) &&
"DAG root value is not a chain!") ? static_cast<void> (
0) : __assert_fail ("(!N.getNode() || N.getValueType() == MVT::Other) && \"DAG root value is not a chain!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 524, __PRETTY_FUNCTION__))
524 "DAG root value is not a chain!")(((!N.getNode() || N.getValueType() == MVT::Other) &&
"DAG root value is not a chain!") ? static_cast<void> (
0) : __assert_fail ("(!N.getNode() || N.getValueType() == MVT::Other) && \"DAG root value is not a chain!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 524, __PRETTY_FUNCTION__))
;
525 if (N.getNode())
526 checkForCycles(N.getNode(), this);
527 Root = N;
528 if (N.getNode())
529 checkForCycles(this);
530 return Root;
531 }
532
533#ifndef NDEBUG
534 void VerifyDAGDiverence();
535#endif
536
537 /// This iterates over the nodes in the SelectionDAG, folding
538 /// certain types of nodes together, or eliminating superfluous nodes. The
539 /// Level argument controls whether Combine is allowed to produce nodes and
540 /// types that are illegal on the target.
541 void Combine(CombineLevel Level, AAResults *AA,
542 CodeGenOpt::Level OptLevel);
543
544 /// This transforms the SelectionDAG into a SelectionDAG that
545 /// only uses types natively supported by the target.
546 /// Returns "true" if it made any changes.
547 ///
548 /// Note that this is an involved process that may invalidate pointers into
549 /// the graph.
550 bool LegalizeTypes();
551
552 /// This transforms the SelectionDAG into a SelectionDAG that is
553 /// compatible with the target instruction selector, as indicated by the
554 /// TargetLowering object.
555 ///
556 /// Note that this is an involved process that may invalidate pointers into
557 /// the graph.
558 void Legalize();
559
560 /// Transforms a SelectionDAG node and any operands to it into a node
561 /// that is compatible with the target instruction selector, as indicated by
562 /// the TargetLowering object.
563 ///
564 /// \returns true if \c N is a valid, legal node after calling this.
565 ///
566 /// This essentially runs a single recursive walk of the \c Legalize process
567 /// over the given node (and its operands). This can be used to incrementally
568 /// legalize the DAG. All of the nodes which are directly replaced,
569 /// potentially including N, are added to the output parameter \c
570 /// UpdatedNodes so that the delta to the DAG can be understood by the
571 /// caller.
572 ///
573 /// When this returns false, N has been legalized in a way that make the
574 /// pointer passed in no longer valid. It may have even been deleted from the
575 /// DAG, and so it shouldn't be used further. When this returns true, the
576 /// N passed in is a legal node, and can be immediately processed as such.
577 /// This may still have done some work on the DAG, and will still populate
578 /// UpdatedNodes with any new nodes replacing those originally in the DAG.
579 bool LegalizeOp(SDNode *N, SmallSetVector<SDNode *, 16> &UpdatedNodes);
580
581 /// This transforms the SelectionDAG into a SelectionDAG
582 /// that only uses vector math operations supported by the target. This is
583 /// necessary as a separate step from Legalize because unrolling a vector
584 /// operation can introduce illegal types, which requires running
585 /// LegalizeTypes again.
586 ///
587 /// This returns true if it made any changes; in that case, LegalizeTypes
588 /// is called again before Legalize.
589 ///
590 /// Note that this is an involved process that may invalidate pointers into
591 /// the graph.
592 bool LegalizeVectors();
593
594 /// This method deletes all unreachable nodes in the SelectionDAG.
595 void RemoveDeadNodes();
596
597 /// Remove the specified node from the system. This node must
598 /// have no referrers.
599 void DeleteNode(SDNode *N);
600
601 /// Return an SDVTList that represents the list of values specified.
602 SDVTList getVTList(EVT VT);
603 SDVTList getVTList(EVT VT1, EVT VT2);
604 SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3);
605 SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4);
606 SDVTList getVTList(ArrayRef<EVT> VTs);
607
608 //===--------------------------------------------------------------------===//
609 // Node creation methods.
610
611 /// Create a ConstantSDNode wrapping a constant value.
612 /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
613 ///
614 /// If only legal types can be produced, this does the necessary
615 /// transformations (e.g., if the vector element type is illegal).
616 /// @{
617 SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
618 bool isTarget = false, bool isOpaque = false);
619 SDValue getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
620 bool isTarget = false, bool isOpaque = false);
621
622 SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false,
623 bool IsOpaque = false) {
624 return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL,
625 VT, IsTarget, IsOpaque);
626 }
627
628 SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
629 bool isTarget = false, bool isOpaque = false);
630 SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL,
631 bool isTarget = false);
632 SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL,
633 bool LegalTypes = true);
634 SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
635 bool isTarget = false);
636
637 SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT,
638 bool isOpaque = false) {
639 return getConstant(Val, DL, VT, true, isOpaque);
640 }
641 SDValue getTargetConstant(const APInt &Val, const SDLoc &DL, EVT VT,
642 bool isOpaque = false) {
643 return getConstant(Val, DL, VT, true, isOpaque);
644 }
645 SDValue getTargetConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
646 bool isOpaque = false) {
647 return getConstant(Val, DL, VT, true, isOpaque);
648 }
649
650 /// Create a true or false constant of type \p VT using the target's
651 /// BooleanContent for type \p OpVT.
652 SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT);
653 /// @}
654
655 /// Create a ConstantFPSDNode wrapping a constant value.
656 /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
657 ///
658 /// If only legal types can be produced, this does the necessary
659 /// transformations (e.g., if the vector element type is illegal).
660 /// The forms that take a double should only be used for simple constants
661 /// that can be exactly represented in VT. No checks are made.
662 /// @{
663 SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT,
664 bool isTarget = false);
665 SDValue getConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT,
666 bool isTarget = false);
667 SDValue getConstantFP(const ConstantFP &V, const SDLoc &DL, EVT VT,
668 bool isTarget = false);
669 SDValue getTargetConstantFP(double Val, const SDLoc &DL, EVT VT) {
670 return getConstantFP(Val, DL, VT, true);
671 }
672 SDValue getTargetConstantFP(const APFloat &Val, const SDLoc &DL, EVT VT) {
673 return getConstantFP(Val, DL, VT, true);
674 }
675 SDValue getTargetConstantFP(const ConstantFP &Val, const SDLoc &DL, EVT VT) {
676 return getConstantFP(Val, DL, VT, true);
677 }
678 /// @}
679
680 SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
681 int64_t offset = 0, bool isTargetGA = false,
682 unsigned TargetFlags = 0);
683 SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT,
684 int64_t offset = 0, unsigned TargetFlags = 0) {
685 return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags);
686 }
687 SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false);
688 SDValue getTargetFrameIndex(int FI, EVT VT) {
689 return getFrameIndex(FI, VT, true);
690 }
691 SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false,
692 unsigned TargetFlags = 0);
693 SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags = 0) {
694 return getJumpTable(JTI, VT, true, TargetFlags);
695 }
696 SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align = None,
697 int Offs = 0, bool isT = false,
698 unsigned TargetFlags = 0);
699 SDValue getTargetConstantPool(const Constant *C, EVT VT,
700 MaybeAlign Align = None, int Offset = 0,
701 unsigned TargetFlags = 0) {
702 return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
703 }
704 SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT,
705 MaybeAlign Align = None, int Offs = 0,
706 bool isT = false, unsigned TargetFlags = 0);
707 SDValue getTargetConstantPool(MachineConstantPoolValue *C, EVT VT,
708 MaybeAlign Align = None, int Offset = 0,
709 unsigned TargetFlags = 0) {
710 return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
711 }
712 SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0,
713 unsigned TargetFlags = 0);
714 // When generating a branch to a BB, we don't in general know enough
715 // to provide debug info for the BB at that time, so keep this one around.
716 SDValue getBasicBlock(MachineBasicBlock *MBB);
717 SDValue getExternalSymbol(const char *Sym, EVT VT);
718 SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
719 unsigned TargetFlags = 0);
720 SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
721
722 SDValue getValueType(EVT);
723 SDValue getRegister(unsigned Reg, EVT VT);
724 SDValue getRegisterMask(const uint32_t *RegMask);
725 SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
726 SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root,
727 MCSymbol *Label);
728 SDValue getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset = 0,
729 bool isTarget = false, unsigned TargetFlags = 0);
730 SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT,
731 int64_t Offset = 0, unsigned TargetFlags = 0) {
732 return getBlockAddress(BA, VT, Offset, true, TargetFlags);
733 }
734
735 SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg,
736 SDValue N) {
737 return getNode(ISD::CopyToReg, dl, MVT::Other, Chain,
738 getRegister(Reg, N.getValueType()), N);
739 }
740
741 // This version of the getCopyToReg method takes an extra operand, which
742 // indicates that there is potentially an incoming glue value (if Glue is not
743 // null) and that there should be a glue result.
744 SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N,
745 SDValue Glue) {
746 SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
747 SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
748 return getNode(ISD::CopyToReg, dl, VTs,
749 makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
750 }
751
752 // Similar to last getCopyToReg() except parameter Reg is a SDValue
753 SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, SDValue Reg, SDValue N,
754 SDValue Glue) {
755 SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
756 SDValue Ops[] = { Chain, Reg, N, Glue };
757 return getNode(ISD::CopyToReg, dl, VTs,
758 makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
759 }
760
761 SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT) {
762 SDVTList VTs = getVTList(VT, MVT::Other);
763 SDValue Ops[] = { Chain, getRegister(Reg, VT) };
764 return getNode(ISD::CopyFromReg, dl, VTs, Ops);
765 }
766
767 // This version of the getCopyFromReg method takes an extra operand, which
768 // indicates that there is potentially an incoming glue value (if Glue is not
769 // null) and that there should be a glue result.
770 SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT,
771 SDValue Glue) {
772 SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
773 SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
774 return getNode(ISD::CopyFromReg, dl, VTs,
775 makeArrayRef(Ops, Glue.getNode() ? 3 : 2));
776 }
777
778 SDValue getCondCode(ISD::CondCode Cond);
779
780 /// Return an ISD::VECTOR_SHUFFLE node. The number of elements in VT,
781 /// which must be a vector type, must match the number of mask elements
782 /// NumElts. An integer mask element equal to -1 is treated as undefined.
783 SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
784 ArrayRef<int> Mask);
785
786 /// Return an ISD::BUILD_VECTOR node. The number of elements in VT,
787 /// which must be a vector type, must match the number of operands in Ops.
788 /// The operands must have the same type as (or, for integers, a type wider
789 /// than) VT's element type.
790 SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef<SDValue> Ops) {
791 // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
792 return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
793 }
794
795 /// Return an ISD::BUILD_VECTOR node. The number of elements in VT,
796 /// which must be a vector type, must match the number of operands in Ops.
797 /// The operands must have the same type as (or, for integers, a type wider
798 /// than) VT's element type.
799 SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef<SDUse> Ops) {
800 // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
801 return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
802 }
803
804 /// Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all
805 /// elements. VT must be a vector type. Op's type must be the same as (or,
806 /// for integers, a type wider than) VT's element type.
807 SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op) {
808 // VerifySDNode (via InsertNode) checks BUILD_VECTOR later.
809 if (Op.getOpcode() == ISD::UNDEF) {
810 assert((VT.getVectorElementType() == Op.getValueType() ||(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 814, __PRETTY_FUNCTION__))
811 (VT.isInteger() &&(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 814, __PRETTY_FUNCTION__))
812 VT.getVectorElementType().bitsLE(Op.getValueType()))) &&(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 814, __PRETTY_FUNCTION__))
813 "A splatted value must have a width equal or (for integers) "(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 814, __PRETTY_FUNCTION__))
814 "greater than the vector element type!")(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 814, __PRETTY_FUNCTION__))
;
815 return getNode(ISD::UNDEF, SDLoc(), VT);
816 }
817
818 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Op);
819 return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
820 }
821
822 // Return a splat ISD::SPLAT_VECTOR node, consisting of Op splatted to all
823 // elements.
824 SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op) {
825 if (Op.getOpcode() == ISD::UNDEF) {
826 assert((VT.getVectorElementType() == Op.getValueType() ||(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 830, __PRETTY_FUNCTION__))
827 (VT.isInteger() &&(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 830, __PRETTY_FUNCTION__))
828 VT.getVectorElementType().bitsLE(Op.getValueType()))) &&(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 830, __PRETTY_FUNCTION__))
829 "A splatted value must have a width equal or (for integers) "(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 830, __PRETTY_FUNCTION__))
830 "greater than the vector element type!")(((VT.getVectorElementType() == Op.getValueType() || (VT.isInteger
() && VT.getVectorElementType().bitsLE(Op.getValueType
()))) && "A splatted value must have a width equal or (for integers) "
"greater than the vector element type!") ? static_cast<void
> (0) : __assert_fail ("(VT.getVectorElementType() == Op.getValueType() || (VT.isInteger() && VT.getVectorElementType().bitsLE(Op.getValueType()))) && \"A splatted value must have a width equal or (for integers) \" \"greater than the vector element type!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 830, __PRETTY_FUNCTION__))
;
831 return getNode(ISD::UNDEF, SDLoc(), VT);
832 }
833 return getNode(ISD::SPLAT_VECTOR, DL, VT, Op);
834 }
835
836 /// Returns a vector of type ResVT whose elements contain the linear sequence
837 /// <0, Step, Step * 2, Step * 3, ...>
838 SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step);
839
840 /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
841 /// the shuffle node in input but with swapped operands.
842 ///
843 /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3>
844 SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV);
845
846 /// Convert Op, which must be of float type, to the
847 /// float type VT, by either extending or rounding (by truncation).
848 SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);
849
850 /// Convert Op, which must be a STRICT operation of float type, to the
851 /// float type VT, by either extending or rounding (by truncation).
852 std::pair<SDValue, SDValue>
853 getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
854
855 /// Convert Op, which must be of integer type, to the
856 /// integer type VT, by either any-extending or truncating it.
857 SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
858
859 /// Convert Op, which must be of integer type, to the
860 /// integer type VT, by either sign-extending or truncating it.
861 SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
862
863 /// Convert Op, which must be of integer type, to the
864 /// integer type VT, by either zero-extending or truncating it.
865 SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
866
867 /// Return the expression required to zero extend the Op
868 /// value assuming it was the smaller SrcTy value.
869 SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
870
871 /// Convert Op, which must be of integer type, to the integer type VT, by
872 /// either truncating it or performing either zero or sign extension as
873 /// appropriate extension for the pointer's semantics.
874 SDValue getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
875
876 /// Return the expression required to extend the Op as a pointer value
877 /// assuming it was the smaller SrcTy value. This may be either a zero extend
878 /// or a sign extend.
879 SDValue getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
880
881 /// Convert Op, which must be of integer type, to the integer type VT,
882 /// by using an extension appropriate for the target's
883 /// BooleanContent for type OpVT or truncating it.
884 SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT);
885
886 /// Create a bitwise NOT operation as (XOR Val, -1).
887 SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT);
888
889 /// Create a logical NOT operation as (XOR Val, BooleanOne).
890 SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT);
891
892 /// Returns sum of the base pointer and offset.
893 /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
894 SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
895 const SDNodeFlags Flags = SDNodeFlags());
896 SDValue getMemBasePlusOffset(SDValue Base, SDValue Offset, const SDLoc &DL,
897 const SDNodeFlags Flags = SDNodeFlags());
898
899 /// Create an add instruction with appropriate flags when used for
900 /// addressing some offset of an object. i.e. if a load is split into multiple
901 /// components, create an add nuw from the base pointer to the offset.
902 SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) {
903 SDNodeFlags Flags;
904 Flags.setNoUnsignedWrap(true);
905 return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
906 }
907
908 SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, SDValue Offset) {
909 // The object itself can't wrap around the address space, so it shouldn't be
910 // possible for the adds of the offsets to the split parts to overflow.
911 SDNodeFlags Flags;
912 Flags.setNoUnsignedWrap(true);
913 return getMemBasePlusOffset(Ptr, Offset, SL, Flags);
914 }
915
916 /// Return a new CALLSEQ_START node, that starts new call frame, in which
917 /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and
918 /// OutSize specifies part of the frame set up prior to the sequence.
919 SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize,
920 const SDLoc &DL) {
921 SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
922 SDValue Ops[] = { Chain,
923 getIntPtrConstant(InSize, DL, true),
924 getIntPtrConstant(OutSize, DL, true) };
925 return getNode(ISD::CALLSEQ_START, DL, VTs, Ops);
926 }
927
928 /// Return a new CALLSEQ_END node, which always must have a
929 /// glue result (to ensure it's not CSE'd).
930 /// CALLSEQ_END does not have a useful SDLoc.
931 SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2,
932 SDValue InGlue, const SDLoc &DL) {
933 SDVTList NodeTys = getVTList(MVT::Other, MVT::Glue);
934 SmallVector<SDValue, 4> Ops;
935 Ops.push_back(Chain);
936 Ops.push_back(Op1);
937 Ops.push_back(Op2);
938 if (InGlue.getNode())
939 Ops.push_back(InGlue);
940 return getNode(ISD::CALLSEQ_END, DL, NodeTys, Ops);
941 }
942
943 /// Return true if the result of this operation is always undefined.
944 bool isUndef(unsigned Opcode, ArrayRef<SDValue> Ops);
945
946 /// Return an UNDEF node. UNDEF does not have a useful SDLoc.
947 SDValue getUNDEF(EVT VT) {
948 return getNode(ISD::UNDEF, SDLoc(), VT);
949 }
950
951 /// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
952 SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
953 assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() &&((MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
"Immediate does not fit VT") ? static_cast<void> (0) :
__assert_fail ("MulImm.getMinSignedBits() <= VT.getSizeInBits() && \"Immediate does not fit VT\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 954, __PRETTY_FUNCTION__))
954 "Immediate does not fit VT")((MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
"Immediate does not fit VT") ? static_cast<void> (0) :
__assert_fail ("MulImm.getMinSignedBits() <= VT.getSizeInBits() && \"Immediate does not fit VT\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 954, __PRETTY_FUNCTION__))
;
955 return getNode(ISD::VSCALE, DL, VT,
956 getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
957 }
958
959 /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
960 SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
961 return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
962 }
963
964 /// Gets or creates the specified node.
965 ///
966 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
967 ArrayRef<SDUse> Ops);
968 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
969 ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
970 SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys,
971 ArrayRef<SDValue> Ops);
972 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
973 ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
974
975 // Use flags from current flag inserter.
976 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
977 ArrayRef<SDValue> Ops);
978 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
979 ArrayRef<SDValue> Ops);
980 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand);
981 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
982 SDValue N2);
983 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
984 SDValue N2, SDValue N3);
985
986 // Specialize based on number of operands.
987 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT);
988 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
989 const SDNodeFlags Flags);
990 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
991 SDValue N2, const SDNodeFlags Flags);
992 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
993 SDValue N2, SDValue N3, const SDNodeFlags Flags);
994 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
995 SDValue N2, SDValue N3, SDValue N4);
996 SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
997 SDValue N2, SDValue N3, SDValue N4, SDValue N5);
998
999 // Specialize again based on number of operands for nodes with a VTList
1000 // rather than a single VT.
1001 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList);
1002 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N);
1003 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
1004 SDValue N2);
1005 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
1006 SDValue N2, SDValue N3);
1007 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
1008 SDValue N2, SDValue N3, SDValue N4);
1009 SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1,
1010 SDValue N2, SDValue N3, SDValue N4, SDValue N5);
1011
1012 /// Compute a TokenFactor to force all the incoming stack arguments to be
1013 /// loaded from the stack. This is used in tail call lowering to protect
1014 /// stack arguments from being clobbered.
1015 SDValue getStackArgumentTokenFactor(SDValue Chain);
1016
1017 LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemcpy(SDValue Chain, const SDLoc &dl,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1018 SDValue Dst, SDValue Src,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1019 SDValue Size, unsigned Align,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1020 bool isVol, bool AlwaysInline,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1021 bool isTailCall,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1022 MachinePointerInfo DstPtrInfo,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1023 MachinePointerInfo SrcPtrInfo),[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
1024 "Use the version that takes Align instead")[[deprecated("Use the version that takes Align instead")]] SDValue
getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline
, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo
SrcPtrInfo)
{
1025 return getMemcpy(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
1026 AlwaysInline, isTailCall, DstPtrInfo, SrcPtrInfo);
1027 }
1028
1029 SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
1030 SDValue Size, Align Alignment, bool isVol,
1031 bool AlwaysInline, bool isTailCall,
1032 MachinePointerInfo DstPtrInfo,
1033 MachinePointerInfo SrcPtrInfo);
1034
1035 LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemmove(SDValue Chain, const SDLoc &dl,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1036 SDValue Dst, SDValue Src,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1037 SDValue Size, unsigned Align,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1038 bool isVol, bool isTailCall,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1039 MachinePointerInfo DstPtrInfo,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1040 MachinePointerInfo SrcPtrInfo),[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
1041 "Use the version that takes Align instead")[[deprecated("Use the version that takes Align instead")]] SDValue
getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo
)
{
1042 return getMemmove(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
1043 isTailCall, DstPtrInfo, SrcPtrInfo);
1044 }
1045 SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
1046 SDValue Size, Align Alignment, bool isVol, bool isTailCall,
1047 MachinePointerInfo DstPtrInfo,
1048 MachinePointerInfo SrcPtrInfo);
1049
1050 LLVM_ATTRIBUTE_DEPRECATED(SDValue getMemset(SDValue Chain, const SDLoc &dl,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
1051 SDValue Dst, SDValue Src,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
1052 SDValue Size, unsigned Align,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
1053 bool isVol, bool isTailCall,[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
1054 MachinePointerInfo DstPtrInfo),[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
1055 "Use the version that takes Align instead")[[deprecated("Use the version that takes Align instead")]] SDValue
getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue
Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall
, MachinePointerInfo DstPtrInfo)
{
1056 return getMemset(Chain, dl, Dst, Src, Size, llvm::Align(Align), isVol,
1057 isTailCall, DstPtrInfo);
1058 }
1059 SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
1060 SDValue Size, Align Alignment, bool isVol, bool isTailCall,
1061 MachinePointerInfo DstPtrInfo);
1062
1063 SDValue getAtomicMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
1064 unsigned DstAlign, SDValue Src, unsigned SrcAlign,
1065 SDValue Size, Type *SizeTy, unsigned ElemSz,
1066 bool isTailCall, MachinePointerInfo DstPtrInfo,
1067 MachinePointerInfo SrcPtrInfo);
1068
1069 SDValue getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
1070 unsigned DstAlign, SDValue Src, unsigned SrcAlign,
1071 SDValue Size, Type *SizeTy, unsigned ElemSz,
1072 bool isTailCall, MachinePointerInfo DstPtrInfo,
1073 MachinePointerInfo SrcPtrInfo);
1074
1075 SDValue getAtomicMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
1076 unsigned DstAlign, SDValue Value, SDValue Size,
1077 Type *SizeTy, unsigned ElemSz, bool isTailCall,
1078 MachinePointerInfo DstPtrInfo);
1079
1080 /// Helper function to make it easier to build SetCC's if you just have an
1081 /// ISD::CondCode instead of an SDValue.
1082 SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
1083 ISD::CondCode Cond, SDValue Chain = SDValue(),
1084 bool IsSignaling = false) {
1085 assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&((LHS.getValueType().isVector() == RHS.getValueType().isVector
() && "Cannot compare scalars to vectors") ? static_cast
<void> (0) : __assert_fail ("LHS.getValueType().isVector() == RHS.getValueType().isVector() && \"Cannot compare scalars to vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1086, __PRETTY_FUNCTION__))
1086 "Cannot compare scalars to vectors")((LHS.getValueType().isVector() == RHS.getValueType().isVector
() && "Cannot compare scalars to vectors") ? static_cast
<void> (0) : __assert_fail ("LHS.getValueType().isVector() == RHS.getValueType().isVector() && \"Cannot compare scalars to vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1086, __PRETTY_FUNCTION__))
;
1087 assert(LHS.getValueType().isVector() == VT.isVector() &&((LHS.getValueType().isVector() == VT.isVector() && "Cannot compare scalars to vectors"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType().isVector() == VT.isVector() && \"Cannot compare scalars to vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1088, __PRETTY_FUNCTION__))
1088 "Cannot compare scalars to vectors")((LHS.getValueType().isVector() == VT.isVector() && "Cannot compare scalars to vectors"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType().isVector() == VT.isVector() && \"Cannot compare scalars to vectors\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1088, __PRETTY_FUNCTION__))
;
1089 assert(Cond != ISD::SETCC_INVALID &&((Cond != ISD::SETCC_INVALID && "Cannot create a setCC of an invalid node."
) ? static_cast<void> (0) : __assert_fail ("Cond != ISD::SETCC_INVALID && \"Cannot create a setCC of an invalid node.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1090, __PRETTY_FUNCTION__))
1090 "Cannot create a setCC of an invalid node.")((Cond != ISD::SETCC_INVALID && "Cannot create a setCC of an invalid node."
) ? static_cast<void> (0) : __assert_fail ("Cond != ISD::SETCC_INVALID && \"Cannot create a setCC of an invalid node.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1090, __PRETTY_FUNCTION__))
;
1091 if (Chain)
1092 return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL,
1093 {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)});
1094 return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
1095 }
1096
1097 /// Helper function to make it easier to build Select's if you just have
1098 /// operands and don't want to check for vector.
1099 SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
1100 SDValue RHS) {
1101 assert(LHS.getValueType() == RHS.getValueType() &&((LHS.getValueType() == RHS.getValueType() && "Cannot use select on differing types"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Cannot use select on differing types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1102, __PRETTY_FUNCTION__))
1102 "Cannot use select on differing types")((LHS.getValueType() == RHS.getValueType() && "Cannot use select on differing types"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Cannot use select on differing types\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1102, __PRETTY_FUNCTION__))
;
1103 assert(VT.isVector() == LHS.getValueType().isVector() &&((VT.isVector() == LHS.getValueType().isVector() && "Cannot mix vectors and scalars"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() == LHS.getValueType().isVector() && \"Cannot mix vectors and scalars\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1104, __PRETTY_FUNCTION__))
1104 "Cannot mix vectors and scalars")((VT.isVector() == LHS.getValueType().isVector() && "Cannot mix vectors and scalars"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() == LHS.getValueType().isVector() && \"Cannot mix vectors and scalars\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1104, __PRETTY_FUNCTION__))
;
1105 auto Opcode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
1106 return getNode(Opcode, DL, VT, Cond, LHS, RHS);
1107 }
1108
1109 /// Helper function to make it easier to build SelectCC's if you just have an
1110 /// ISD::CondCode instead of an SDValue.
1111 SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
1112 SDValue False, ISD::CondCode Cond) {
1113 return getNode(ISD::SELECT_CC, DL, True.getValueType(), LHS, RHS, True,
1114 False, getCondCode(Cond));
1115 }
1116
1117 /// Try to simplify a select/vselect into 1 of its operands or a constant.
1118 SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal);
1119
1120 /// Try to simplify a shift into 1 of its operands or a constant.
1121 SDValue simplifyShift(SDValue X, SDValue Y);
1122
1123 /// Try to simplify a floating-point binary operation into 1 of its operands
1124 /// or a constant.
1125 SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
1126 SDNodeFlags Flags);
1127
1128 /// VAArg produces a result and token chain, and takes a pointer
1129 /// and a source value as input.
1130 SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
1131 SDValue SV, unsigned Align);
1132
1133 /// Gets a node for an atomic cmpxchg op. There are two
1134 /// valid Opcodes. ISD::ATOMIC_CMO_SWAP produces the value loaded and a
1135 /// chain result. ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS produces the value loaded,
1136 /// a success flag (initially i1), and a chain.
1137 SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
1138 SDVTList VTs, SDValue Chain, SDValue Ptr,
1139 SDValue Cmp, SDValue Swp, MachineMemOperand *MMO);
1140
1141 /// Gets a node for an atomic op, produces result (if relevant)
1142 /// and chain and takes 2 operands.
1143 SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
1144 SDValue Ptr, SDValue Val, MachineMemOperand *MMO);
1145
1146 /// Gets a node for an atomic op, produces result and chain and
1147 /// takes 1 operand.
1148 SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, EVT VT,
1149 SDValue Chain, SDValue Ptr, MachineMemOperand *MMO);
1150
1151 /// Gets a node for an atomic op, produces result and chain and takes N
1152 /// operands.
1153 SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
1154 SDVTList VTList, ArrayRef<SDValue> Ops,
1155 MachineMemOperand *MMO);
1156
1157 /// Creates a MemIntrinsicNode that may produce a
1158 /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
1159 /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
1160 /// less than FIRST_TARGET_MEMORY_OPCODE.
1161 SDValue getMemIntrinsicNode(
1162 unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
1163 EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
1164 MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
1165 MachineMemOperand::MOStore,
1166 uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes());
1167
1168 inline SDValue getMemIntrinsicNode(
1169 unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
1170 EVT MemVT, MachinePointerInfo PtrInfo, MaybeAlign Alignment = None,
1171 MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
1172 MachineMemOperand::MOStore,
1173 uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()) {
1174 // Ensure that codegen never sees alignment 0
1175 return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo,
1176 Alignment.getValueOr(getEVTAlign(MemVT)), Flags,
1177 Size, AAInfo);
1178 }
1179
1180 LLVM_ATTRIBUTE_DEPRECATED([[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1181 inline SDValue getMemIntrinsicNode([[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1182 unsigned Opcode, const SDLoc &dl, SDVTList VTList,[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1183 ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo,[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1184 unsigned Alignment,[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1185 MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1186 MachineMemOperand::MOStore,[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1187 uint64_t Size = 0, const AAMDNodes &AAInfo = AAMDNodes()),[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
1188 "")[[deprecated("")]] inline SDValue getMemIntrinsicNode( unsigned
Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue
> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Alignment
, MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad |
MachineMemOperand::MOStore, uint64_t Size = 0, const AAMDNodes
&AAInfo = AAMDNodes())
{
1189 return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, PtrInfo,
1190 MaybeAlign(Alignment), Flags, Size, AAInfo);
1191 }
1192
1193 SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
1194 ArrayRef<SDValue> Ops, EVT MemVT,
1195 MachineMemOperand *MMO);
1196
1197 /// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends
1198 /// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between
1199 /// offsets `Offset` and `Offset + Size`.
1200 SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
1201 int FrameIndex, int64_t Size, int64_t Offset = -1);
1202
1203 /// Creates a PseudoProbeSDNode with function GUID `Guid` and
1204 /// the index of the block `Index` it is probing, as well as the attributes
1205 /// `attr` of the probe.
1206 SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid,
1207 uint64_t Index, uint32_t Attr);
1208
1209 /// Create a MERGE_VALUES node from the given operands.
1210 SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
1211
1212 /// Loads are not normal binary operators: their result type is not
1213 /// determined by their operands, and they produce a value AND a token chain.
1214 ///
1215 /// This function will set the MOLoad flag on MMOFlags, but you can set it if
1216 /// you want. The MOStore flag must not be set.
1217 SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
1218 MachinePointerInfo PtrInfo,
1219 MaybeAlign Alignment = MaybeAlign(),
1220 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1221 const AAMDNodes &AAInfo = AAMDNodes(),
1222 const MDNode *Ranges = nullptr);
1223 /// FIXME: Remove once transition to Align is over.
1224 inline SDValue
1225 getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
1226 MachinePointerInfo PtrInfo, unsigned Alignment,
1227 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1228 const AAMDNodes &AAInfo = AAMDNodes(),
1229 const MDNode *Ranges = nullptr) {
1230 return getLoad(VT, dl, Chain, Ptr, PtrInfo, MaybeAlign(Alignment), MMOFlags,
1231 AAInfo, Ranges);
1232 }
1233 SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
1234 MachineMemOperand *MMO);
1235 SDValue
1236 getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
1237 SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
1238 MaybeAlign Alignment = MaybeAlign(),
1239 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1240 const AAMDNodes &AAInfo = AAMDNodes());
1241 /// FIXME: Remove once transition to Align is over.
1242 inline SDValue
1243 getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain,
1244 SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT,
1245 unsigned Alignment,
1246 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1247 const AAMDNodes &AAInfo = AAMDNodes()) {
1248 return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT,
1249 MaybeAlign(Alignment), MMOFlags, AAInfo);
1250 }
1251 SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
1252 SDValue Chain, SDValue Ptr, EVT MemVT,
1253 MachineMemOperand *MMO);
1254 SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
1255 SDValue Offset, ISD::MemIndexedMode AM);
1256 SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
1257 const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
1258 MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
1259 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1260 const AAMDNodes &AAInfo = AAMDNodes(),
1261 const MDNode *Ranges = nullptr);
1262 inline SDValue getLoad(
1263 ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
1264 SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo,
1265 EVT MemVT, MaybeAlign Alignment = MaybeAlign(),
1266 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1267 const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) {
1268 // Ensures that codegen never sees a None Alignment.
1269 return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
1270 Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo,
1271 Ranges);
1272 }
1273 /// FIXME: Remove once transition to Align is over.
1274 inline SDValue
1275 getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
1276 const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
1277 MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment,
1278 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1279 const AAMDNodes &AAInfo = AAMDNodes(),
1280 const MDNode *Ranges = nullptr) {
1281 return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT,
1282 MaybeAlign(Alignment), MMOFlags, AAInfo, Ranges);
1283 }
1284 SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
1285 const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
1286 EVT MemVT, MachineMemOperand *MMO);
1287
1288 /// Helper function to build ISD::STORE nodes.
1289 ///
1290 /// This function will set the MOStore flag on MMOFlags, but you can set it if
1291 /// you want. The MOLoad and MOInvariant flags must not be set.
1292
1293 SDValue
1294 getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1295 MachinePointerInfo PtrInfo, Align Alignment,
1296 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1297 const AAMDNodes &AAInfo = AAMDNodes());
1298 inline SDValue
1299 getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1300 MachinePointerInfo PtrInfo, MaybeAlign Alignment = MaybeAlign(),
1301 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1302 const AAMDNodes &AAInfo = AAMDNodes()) {
1303 return getStore(Chain, dl, Val, Ptr, PtrInfo,
1304 Alignment.getValueOr(getEVTAlign(Val.getValueType())),
33
Calling 'SDValue::getValueType'
1305 MMOFlags, AAInfo);
1306 }
1307 /// FIXME: Remove once transition to Align is over.
1308 inline SDValue
1309 getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1310 MachinePointerInfo PtrInfo, unsigned Alignment,
1311 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1312 const AAMDNodes &AAInfo = AAMDNodes()) {
1313 return getStore(Chain, dl, Val, Ptr, PtrInfo, MaybeAlign(Alignment),
1314 MMOFlags, AAInfo);
1315 }
1316 SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1317 MachineMemOperand *MMO);
1318 SDValue
1319 getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1320 MachinePointerInfo PtrInfo, EVT SVT, Align Alignment,
1321 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1322 const AAMDNodes &AAInfo = AAMDNodes());
1323 inline SDValue
1324 getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1325 MachinePointerInfo PtrInfo, EVT SVT,
1326 MaybeAlign Alignment = MaybeAlign(),
1327 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1328 const AAMDNodes &AAInfo = AAMDNodes()) {
1329 return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
1330 Alignment.getValueOr(getEVTAlign(SVT)), MMOFlags,
1331 AAInfo);
1332 }
1333 /// FIXME: Remove once transition to Align is over.
1334 inline SDValue
1335 getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
1336 MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment,
1337 MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
1338 const AAMDNodes &AAInfo = AAMDNodes()) {
1339 return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT,
1340 MaybeAlign(Alignment), MMOFlags, AAInfo);
1341 }
1342 SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
1343 SDValue Ptr, EVT SVT, MachineMemOperand *MMO);
1344 SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
1345 SDValue Offset, ISD::MemIndexedMode AM);
1346
1347 SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
1348 SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
1349 MachineMemOperand *MMO, ISD::MemIndexedMode AM,
1350 ISD::LoadExtType, bool IsExpanding = false);
1351 SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
1352 SDValue Offset, ISD::MemIndexedMode AM);
1353 SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
1354 SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT,
1355 MachineMemOperand *MMO, ISD::MemIndexedMode AM,
1356 bool IsTruncating = false, bool IsCompressing = false);
1357 SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
1358 SDValue Base, SDValue Offset,
1359 ISD::MemIndexedMode AM);
1360 SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
1361 ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
1362 ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy);
1363 SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
1364 ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
1365 ISD::MemIndexType IndexType,
1366 bool IsTruncating = false);
1367
1368 /// Construct a node to track a Value* through the backend.
1369 SDValue getSrcValue(const Value *v);
1370
1371 /// Return an MDNodeSDNode which holds an MDNode.
1372 SDValue getMDNode(const MDNode *MD);
1373
1374 /// Return a bitcast using the SDLoc of the value operand, and casting to the
1375 /// provided type. Use getNode to set a custom SDLoc.
1376 SDValue getBitcast(EVT VT, SDValue V);
1377
1378 /// Return an AddrSpaceCastSDNode.
1379 SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS,
1380 unsigned DestAS);
1381
1382 /// Return a freeze using the SDLoc of the value operand.
1383 SDValue getFreeze(SDValue V);
1384
1385 /// Return an AssertAlignSDNode.
1386 SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
1387
1388 /// Return the specified value casted to
1389 /// the target's desired shift amount type.
1390 SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
1391
1392 /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
1393 SDValue expandVAArg(SDNode *Node);
1394
1395 /// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
1396 SDValue expandVACopy(SDNode *Node);
1397
1398 /// Returs an GlobalAddress of the function from the current module with
1399 /// name matching the given ExternalSymbol. Additionally can provide the
1400 /// matched function.
1401 /// Panics the function doesn't exists.
1402 SDValue getSymbolFunctionGlobalAddress(SDValue Op,
1403 Function **TargetFunction = nullptr);
1404
1405 /// *Mutate* the specified node in-place to have the
1406 /// specified operands. If the resultant node already exists in the DAG,
1407 /// this does not modify the specified node, instead it returns the node that
1408 /// already exists. If the resultant node does not exist in the DAG, the
1409 /// input node is returned. As a degenerate case, if you specify the same
1410 /// input operands as the node already has, the input node is returned.
1411 SDNode *UpdateNodeOperands(SDNode *N, SDValue Op);
1412 SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2);
1413 SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
1414 SDValue Op3);
1415 SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
1416 SDValue Op3, SDValue Op4);
1417 SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
1418 SDValue Op3, SDValue Op4, SDValue Op5);
1419 SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
1420
1421 /// Creates a new TokenFactor containing \p Vals. If \p Vals contains 64k
1422 /// values or more, move values into new TokenFactors in 64k-1 blocks, until
1423 /// the final TokenFactor has less than 64k operands.
1424 SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl<SDValue> &Vals);
1425
1426 /// *Mutate* the specified machine node's memory references to the provided
1427 /// list.
1428 void setNodeMemRefs(MachineSDNode *N,
1429 ArrayRef<MachineMemOperand *> NewMemRefs);
1430
1431 // Calculate divergence of node \p N based on its operands.
1432 bool calculateDivergence(SDNode *N);
1433
1434 // Propagates the change in divergence to users
1435 void updateDivergence(SDNode * N);
1436
1437 /// These are used for target selectors to *mutate* the
1438 /// specified node to have the specified return type, Target opcode, and
1439 /// operands. Note that target opcodes are stored as
1440 /// ~TargetOpcode in the node opcode field. The resultant node is returned.
1441 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT);
1442 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1);
1443 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
1444 SDValue Op1, SDValue Op2);
1445 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
1446 SDValue Op1, SDValue Op2, SDValue Op3);
1447 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT,
1448 ArrayRef<SDValue> Ops);
1449 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2);
1450 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
1451 EVT VT2, ArrayRef<SDValue> Ops);
1452 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
1453 EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
1454 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
1455 EVT VT2, SDValue Op1, SDValue Op2);
1456 SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,
1457 ArrayRef<SDValue> Ops);
1458
1459 /// This *mutates* the specified node to have the specified
1460 /// return type, opcode, and operands.
1461 SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs,
1462 ArrayRef<SDValue> Ops);
1463
1464 /// Mutate the specified strict FP node to its non-strict equivalent,
1465 /// unlinking the node from its chain and dropping the metadata arguments.
1466 /// The node must be a strict FP node.
1467 SDNode *mutateStrictFPToFP(SDNode *Node);
1468
1469 /// These are used for target selectors to create a new node
1470 /// with specified return type(s), MachineInstr opcode, and operands.
1471 ///
1472 /// Note that getMachineNode returns the resultant node. If there is already
1473 /// a node of the specified opcode and operands, it returns that node instead
1474 /// of the current one.
1475 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT);
1476 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
1477 SDValue Op1);
1478 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
1479 SDValue Op1, SDValue Op2);
1480 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
1481 SDValue Op1, SDValue Op2, SDValue Op3);
1482 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
1483 ArrayRef<SDValue> Ops);
1484 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1485 EVT VT2, SDValue Op1, SDValue Op2);
1486 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1487 EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
1488 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1489 EVT VT2, ArrayRef<SDValue> Ops);
1490 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1491 EVT VT2, EVT VT3, SDValue Op1, SDValue Op2);
1492 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1493 EVT VT2, EVT VT3, SDValue Op1, SDValue Op2,
1494 SDValue Op3);
1495 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
1496 EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
1497 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl,
1498 ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
1499 MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, SDVTList VTs,
1500 ArrayRef<SDValue> Ops);
1501
1502 /// A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
1503 SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
1504 SDValue Operand);
1505
1506 /// A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
1507 SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
1508 SDValue Operand, SDValue Subreg);
1509
1510 /// Get the specified node if it's already available, or else return NULL.
1511 SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
1512 ArrayRef<SDValue> Ops, const SDNodeFlags Flags);
1513 SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList,
1514 ArrayRef<SDValue> Ops);
1515
1516 /// Check if a node exists without modifying its flags.
1517 bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops);
1518
1519 /// Creates a SDDbgValue node.
1520 SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N,
1521 unsigned R, bool IsIndirect, const DebugLoc &DL,
1522 unsigned O);
1523
1524 /// Creates a constant SDDbgValue node.
1525 SDDbgValue *getConstantDbgValue(DIVariable *Var, DIExpression *Expr,
1526 const Value *C, const DebugLoc &DL,
1527 unsigned O);
1528
1529 /// Creates a FrameIndex SDDbgValue node.
1530 SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr,
1531 unsigned FI, bool IsIndirect,
1532 const DebugLoc &DL, unsigned O);
1533
1534 /// Creates a FrameIndex SDDbgValue node.
1535 SDDbgValue *getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr,
1536 unsigned FI,
1537 ArrayRef<SDNode *> Dependencies,
1538 bool IsIndirect, const DebugLoc &DL,
1539 unsigned O);
1540
1541 /// Creates a VReg SDDbgValue node.
1542 SDDbgValue *getVRegDbgValue(DIVariable *Var, DIExpression *Expr,
1543 unsigned VReg, bool IsIndirect,
1544 const DebugLoc &DL, unsigned O);
1545
1546 /// Creates a SDDbgValue node from a list of locations.
1547 SDDbgValue *getDbgValueList(DIVariable *Var, DIExpression *Expr,
1548 ArrayRef<SDDbgOperand> Locs,
1549 ArrayRef<SDNode *> Dependencies, bool IsIndirect,
1550 const DebugLoc &DL, unsigned O, bool IsVariadic);
1551
1552 /// Creates a SDDbgLabel node.
1553 SDDbgLabel *getDbgLabel(DILabel *Label, const DebugLoc &DL, unsigned O);
1554
1555 /// Transfer debug values from one node to another, while optionally
1556 /// generating fragment expressions for split-up values. If \p InvalidateDbg
1557 /// is set, debug values are invalidated after they are transferred.
1558 void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits = 0,
1559 unsigned SizeInBits = 0, bool InvalidateDbg = true);
1560
1561 /// Remove the specified node from the system. If any of its
1562 /// operands then becomes dead, remove them as well. Inform UpdateListener
1563 /// for each node deleted.
1564 void RemoveDeadNode(SDNode *N);
1565
1566 /// This method deletes the unreachable nodes in the
1567 /// given list, and any nodes that become unreachable as a result.
1568 void RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes);
1569
1570 /// Modify anything using 'From' to use 'To' instead.
1571 /// This can cause recursive merging of nodes in the DAG. Use the first
1572 /// version if 'From' is known to have a single result, use the second
1573 /// if you have two nodes with identical results (or if 'To' has a superset
1574 /// of the results of 'From'), use the third otherwise.
1575 ///
1576 /// These methods all take an optional UpdateListener, which (if not null) is
1577 /// informed about nodes that are deleted and modified due to recursive
1578 /// changes in the dag.
1579 ///
1580 /// These functions only replace all existing uses. It's possible that as
1581 /// these replacements are being performed, CSE may cause the From node
1582 /// to be given new uses. These new uses of From are left in place, and
1583 /// not automatically transferred to To.
1584 ///
1585 void ReplaceAllUsesWith(SDValue From, SDValue To);
1586 void ReplaceAllUsesWith(SDNode *From, SDNode *To);
1587 void ReplaceAllUsesWith(SDNode *From, const SDValue *To);
1588
1589 /// Replace any uses of From with To, leaving
1590 /// uses of other values produced by From.getNode() alone.
1591 void ReplaceAllUsesOfValueWith(SDValue From, SDValue To);
1592
1593 /// Like ReplaceAllUsesOfValueWith, but for multiple values at once.
1594 /// This correctly handles the case where
1595 /// there is an overlap between the From values and the To values.
1596 void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To,
1597 unsigned Num);
1598
1599 /// If an existing load has uses of its chain, create a token factor node with
1600 /// that chain and the new memory node's chain and update users of the old
1601 /// chain to the token factor. This ensures that the new memory node will have
1602 /// the same relative memory dependency position as the old load. Returns the
1603 /// new merged load chain.
1604 SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain);
1605
1606 /// If an existing load has uses of its chain, create a token factor node with
1607 /// that chain and the new memory node's chain and update users of the old
1608 /// chain to the token factor. This ensures that the new memory node will have
1609 /// the same relative memory dependency position as the old load. Returns the
1610 /// new merged load chain.
1611 SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
1612
1613 /// Topological-sort the AllNodes list and a
1614 /// assign a unique node id for each node in the DAG based on their
1615 /// topological order. Returns the number of nodes.
1616 unsigned AssignTopologicalOrder();
1617
1618 /// Move node N in the AllNodes list to be immediately
1619 /// before the given iterator Position. This may be used to update the
1620 /// topological ordering when the list of nodes is modified.
1621 void RepositionNode(allnodes_iterator Position, SDNode *N) {
1622 AllNodes.insert(Position, AllNodes.remove(N));
1623 }
1624
1625 /// Returns an APFloat semantics tag appropriate for the given type. If VT is
1626 /// a vector type, the element semantics are returned.
1627 static const fltSemantics &EVTToAPFloatSemantics(EVT VT) {
1628 switch (VT.getScalarType().getSimpleVT().SimpleTy) {
1629 default: llvm_unreachable("Unknown FP format")::llvm::llvm_unreachable_internal("Unknown FP format", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAG.h"
, 1629)
;
1630 case MVT::f16: return APFloat::IEEEhalf();
1631 case MVT::bf16: return APFloat::BFloat();
1632 case MVT::f32: return APFloat::IEEEsingle();
1633 case MVT::f64: return APFloat::IEEEdouble();
1634 case MVT::f80: return APFloat::x87DoubleExtended();
1635 case MVT::f128: return APFloat::IEEEquad();
1636 case MVT::ppcf128: return APFloat::PPCDoubleDouble();
1637 }
1638 }
1639
1640 /// Add a dbg_value SDNode. If SD is non-null that means the
1641 /// value is produced by SD.
1642 void AddDbgValue(SDDbgValue *DB, bool isParameter);
1643
1644 /// Add a dbg_label SDNode.
1645 void AddDbgLabel(SDDbgLabel *DB);
1646
1647 /// Get the debug values which reference the given SDNode.
1648 ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) const {
1649 return DbgInfo->getSDDbgValues(SD);
1650 }
1651
1652public:
1653 /// Return true if there are any SDDbgValue nodes associated
1654 /// with this SelectionDAG.
1655 bool hasDebugValues() const { return !DbgInfo->empty(); }
1656
1657 SDDbgInfo::DbgIterator DbgBegin() const { return DbgInfo->DbgBegin(); }
1658 SDDbgInfo::DbgIterator DbgEnd() const { return DbgInfo->DbgEnd(); }
1659
1660 SDDbgInfo::DbgIterator ByvalParmDbgBegin() const {
1661 return DbgInfo->ByvalParmDbgBegin();
1662 }
1663 SDDbgInfo::DbgIterator ByvalParmDbgEnd() const {
1664 return DbgInfo->ByvalParmDbgEnd();
1665 }
1666
1667 SDDbgInfo::DbgLabelIterator DbgLabelBegin() const {
1668 return DbgInfo->DbgLabelBegin();
1669 }
1670 SDDbgInfo::DbgLabelIterator DbgLabelEnd() const {
1671 return DbgInfo->DbgLabelEnd();
1672 }
1673
1674 /// To be invoked on an SDNode that is slated to be erased. This
1675 /// function mirrors \c llvm::salvageDebugInfo.
1676 void salvageDebugInfo(SDNode &N);
1677
1678 void dump() const;
1679
1680 /// In most cases this function returns the ABI alignment for a given type,
1681 /// except for illegal vector types where the alignment exceeds that of the
1682 /// stack. In such cases we attempt to break the vector down to a legal type
1683 /// and return the ABI alignment for that instead.
1684 Align getReducedAlign(EVT VT, bool UseABI);
1685
1686 /// Create a stack temporary based on the size in bytes and the alignment
1687 SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment);
1688
1689 /// Create a stack temporary, suitable for holding the specified value type.
1690 /// If minAlign is specified, the slot size will have at least that alignment.
1691 SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
1692
1693 /// Create a stack temporary suitable for holding either of the specified
1694 /// value types.
1695 SDValue CreateStackTemporary(EVT VT1, EVT VT2);
1696
1697 SDValue FoldSymbolOffset(unsigned Opcode, EVT VT,
1698 const GlobalAddressSDNode *GA,
1699 const SDNode *N2);
1700
1701 SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
1702 ArrayRef<SDValue> Ops);
1703
1704 SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
1705 ArrayRef<SDValue> Ops,
1706 const SDNodeFlags Flags = SDNodeFlags());
1707
1708 /// Fold floating-point operations with 2 operands when both operands are
1709 /// constants and/or undefined.
1710 SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
1711 SDValue N1, SDValue N2);
1712
1713 /// Constant fold a setcc to true or false.
1714 SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
1715 const SDLoc &dl);
1716
1717 /// See if the specified operand can be simplified with the knowledge that
1718 /// only the bits specified by DemandedBits are used. If so, return the
1719 /// simpler operand, otherwise return a null SDValue.
1720 ///
1721 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
1722 /// simplify nodes with multiple uses more aggressively.)
1723 SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits);
1724
1725 /// See if the specified operand can be simplified with the knowledge that
1726 /// only the bits specified by DemandedBits are used in the elements specified
1727 /// by DemandedElts. If so, return the simpler operand, otherwise return a
1728 /// null SDValue.
1729 ///
1730 /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
1731 /// simplify nodes with multiple uses more aggressively.)
1732 SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits,
1733 const APInt &DemandedElts);
1734
1735 /// Return true if the sign bit of Op is known to be zero.
1736 /// We use this predicate to simplify operations downstream.
1737 bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
1738
1739 /// Return true if 'Op & Mask' is known to be zero. We
1740 /// use this predicate to simplify operations downstream. Op and Mask are
1741 /// known to be the same type.
1742 bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
1743 unsigned Depth = 0) const;
1744
1745 /// Return true if 'Op & Mask' is known to be zero in DemandedElts. We
1746 /// use this predicate to simplify operations downstream. Op and Mask are
1747 /// known to be the same type.
1748 bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
1749 const APInt &DemandedElts, unsigned Depth = 0) const;
1750
1751 /// Return true if '(Op & Mask) == Mask'.
1752 /// Op and Mask are known to be the same type.
1753 bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask,
1754 unsigned Depth = 0) const;
1755
1756 /// Determine which bits of Op are known to be either zero or one and return
1757 /// them in Known. For vectors, the known bits are those that are shared by
1758 /// every vector element.
1759 /// Targets can implement the computeKnownBitsForTargetNode method in the
1760 /// TargetLowering class to allow target nodes to be understood.
1761 KnownBits computeKnownBits(SDValue Op, unsigned Depth = 0) const;
1762
1763 /// Determine which bits of Op are known to be either zero or one and return
1764 /// them in Known. The DemandedElts argument allows us to only collect the
1765 /// known bits that are shared by the requested vector elements.
1766 /// Targets can implement the computeKnownBitsForTargetNode method in the
1767 /// TargetLowering class to allow target nodes to be understood.
1768 KnownBits computeKnownBits(SDValue Op, const APInt &DemandedElts,
1769 unsigned Depth = 0) const;
1770
1771 /// Used to represent the possible overflow behavior of an operation.
1772 /// Never: the operation cannot overflow.
1773 /// Always: the operation will always overflow.
1774 /// Sometime: the operation may or may not overflow.
1775 enum OverflowKind {
1776 OFK_Never,
1777 OFK_Sometime,
1778 OFK_Always,
1779 };
1780
1781 /// Determine if the result of the addition of 2 node can overflow.
1782 OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const;
1783
1784 /// Test if the given value is known to have exactly one bit set. This differs
1785 /// from computeKnownBits in that it doesn't necessarily determine which bit
1786 /// is set.
1787 bool isKnownToBeAPowerOfTwo(SDValue Val) const;
1788
1789 /// Return the number of times the sign bit of the register is replicated into
1790 /// the other bits. We know that at least 1 bit is always equal to the sign
1791 /// bit (itself), but other cases can give us information. For example,
1792 /// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
1793 /// to each other, so we return 3. Targets can implement the
1794 /// ComputeNumSignBitsForTarget method in the TargetLowering class to allow
1795 /// target nodes to be understood.
1796 unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
1797
1798 /// Return the number of times the sign bit of the register is replicated into
1799 /// the other bits. We know that at least 1 bit is always equal to the sign
1800 /// bit (itself), but other cases can give us information. For example,
1801 /// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
1802 /// to each other, so we return 3. The DemandedElts argument allows
1803 /// us to only collect the minimum sign bits of the requested vector elements.
1804 /// Targets can implement the ComputeNumSignBitsForTarget method in the
1805 /// TargetLowering class to allow target nodes to be understood.
1806 unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
1807 unsigned Depth = 0) const;
1808
1809 /// Return true if the specified operand is an ISD::ADD with a ConstantSDNode
1810 /// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that
1811 /// is guaranteed to have the same semantics as an ADD. This handles the
1812 /// equivalence:
1813 /// X|Cst == X+Cst iff X&Cst = 0.
1814 bool isBaseWithConstantOffset(SDValue Op) const;
1815
1816 /// Test whether the given SDValue is known to never be NaN. If \p SNaN is
1817 /// true, returns if \p Op is known to never be a signaling NaN (it may still
1818 /// be a qNaN).
1819 bool isKnownNeverNaN(SDValue Op, bool SNaN = false, unsigned Depth = 0) const;
1820
1821 /// \returns true if \p Op is known to never be a signaling NaN.
1822 bool isKnownNeverSNaN(SDValue Op, unsigned Depth = 0) const {
1823 return isKnownNeverNaN(Op, true, Depth);
1824 }
1825
1826 /// Test whether the given floating point SDValue is known to never be
1827 /// positive or negative zero.
1828 bool isKnownNeverZeroFloat(SDValue Op) const;
1829
1830 /// Test whether the given SDValue is known to contain non-zero value(s).
1831 bool isKnownNeverZero(SDValue Op) const;
1832
1833 /// Test whether two SDValues are known to compare equal. This
1834 /// is true if they are the same value, or if one is negative zero and the
1835 /// other positive zero.
1836 bool isEqualTo(SDValue A, SDValue B) const;
1837
1838 /// Return true if A and B have no common bits set. As an example, this can
1839 /// allow an 'add' to be transformed into an 'or'.
1840 bool haveNoCommonBitsSet(SDValue A, SDValue B) const;
1841
1842 /// Test whether \p V has a splatted value for all the demanded elements.
1843 ///
1844 /// On success \p UndefElts will indicate the elements that have UNDEF
1845 /// values instead of the splat value, this is only guaranteed to be correct
1846 /// for \p DemandedElts.
1847 ///
1848 /// NOTE: The function will return true for a demanded splat of UNDEF values.
1849 bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts,
1850 unsigned Depth = 0);
1851
1852 /// Test whether \p V has a splatted value.
1853 bool isSplatValue(SDValue V, bool AllowUndefs = false);
1854
1855 /// If V is a splatted value, return the source vector and its splat index.
1856 SDValue getSplatSourceVector(SDValue V, int &SplatIndex);
1857
1858 /// If V is a splat vector, return its scalar source operand by extracting
1859 /// that element from the source vector.
1860 SDValue getSplatValue(SDValue V);
1861
1862 /// If a SHL/SRA/SRL node \p V has a constant or splat constant shift amount
1863 /// that is less than the element bit-width of the shift node, return it.
1864 const APInt *getValidShiftAmountConstant(SDValue V,
1865 const APInt &DemandedElts) const;
1866
1867 /// If a SHL/SRA/SRL node \p V has constant shift amounts that are all less
1868 /// than the element bit-width of the shift node, return the minimum value.
1869 const APInt *
1870 getValidMinimumShiftAmountConstant(SDValue V,
1871 const APInt &DemandedElts) const;
1872
1873 /// If a SHL/SRA/SRL node \p V has constant shift amounts that are all less
1874 /// than the element bit-width of the shift node, return the maximum value.
1875 const APInt *
1876 getValidMaximumShiftAmountConstant(SDValue V,
1877 const APInt &DemandedElts) const;
1878
1879 /// Match a binop + shuffle pyramid that represents a horizontal reduction
1880 /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
1881 /// Extract. The reduction must use one of the opcodes listed in /p
1882 /// CandidateBinOps and on success /p BinOp will contain the matching opcode.
1883 /// Returns the vector that is being reduced on, or SDValue() if a reduction
1884 /// was not matched. If \p AllowPartials is set then in the case of a
1885 /// reduction pattern that only matches the first few stages, the extracted
1886 /// subvector of the start of the reduction is returned.
1887 SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
1888 ArrayRef<ISD::NodeType> CandidateBinOps,
1889 bool AllowPartials = false);
1890
1891 /// Utility function used by legalize and lowering to
1892 /// "unroll" a vector operation by splitting out the scalars and operating
1893 /// on each element individually. If the ResNE is 0, fully unroll the vector
1894 /// op. If ResNE is less than the width of the vector op, unroll up to ResNE.
1895 /// If the ResNE is greater than the width of the vector op, unroll the
1896 /// vector op and fill the end of the resulting vector with UNDEFS.
1897 SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
1898
1899 /// Like UnrollVectorOp(), but for the [US](ADD|SUB|MUL)O family of opcodes.
1900 /// This is a separate function because those opcodes have two results.
1901 std::pair<SDValue, SDValue> UnrollVectorOverflowOp(SDNode *N,
1902 unsigned ResNE = 0);
1903
1904 /// Return true if loads are next to each other and can be
1905 /// merged. Check that both are nonvolatile and if LD is loading
1906 /// 'Bytes' bytes from a location that is 'Dist' units away from the
1907 /// location that the 'Base' load is loading from.
1908 bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
1909 unsigned Bytes, int Dist) const;
1910
1911 /// Infer alignment of a load / store address. Return None if it cannot be
1912 /// inferred.
1913 MaybeAlign InferPtrAlign(SDValue Ptr) const;
1914
1915 LLVM_ATTRIBUTE_DEPRECATED(inline unsigned InferPtrAlignment(SDValue Ptr)[[deprecated("Use InferPtrAlign instead")]] inline unsigned InferPtrAlignment
(SDValue Ptr) const
1916 const,[[deprecated("Use InferPtrAlign instead")]] inline unsigned InferPtrAlignment
(SDValue Ptr) const
1917 "Use InferPtrAlign instead")[[deprecated("Use InferPtrAlign instead")]] inline unsigned InferPtrAlignment
(SDValue Ptr) const
{
1918 if (auto A = InferPtrAlign(Ptr))
1919 return A->value();
1920 return 0;
1921 }
1922
1923 /// Compute the VTs needed for the low/hi parts of a type
1924 /// which is split (or expanded) into two not necessarily identical pieces.
1925 std::pair<EVT, EVT> GetSplitDestVTs(const EVT &VT) const;
1926
1927 /// Compute the VTs needed for the low/hi parts of a type, dependent on an
1928 /// enveloping VT that has been split into two identical pieces. Sets the
1929 /// HisIsEmpty flag when hi type has zero storage size.
1930 std::pair<EVT, EVT> GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
1931 bool *HiIsEmpty) const;
1932
1933 /// Split the vector with EXTRACT_SUBVECTOR using the provides
1934 /// VTs and return the low/high part.
1935 std::pair<SDValue, SDValue> SplitVector(const SDValue &N, const SDLoc &DL,
1936 const EVT &LoVT, const EVT &HiVT);
1937
1938 /// Split the vector with EXTRACT_SUBVECTOR and return the low/high part.
1939 std::pair<SDValue, SDValue> SplitVector(const SDValue &N, const SDLoc &DL) {
1940 EVT LoVT, HiVT;
1941 std::tie(LoVT, HiVT) = GetSplitDestVTs(N.getValueType());
1942 return SplitVector(N, DL, LoVT, HiVT);
1943 }
1944
1945 /// Split the node's operand with EXTRACT_SUBVECTOR and
1946 /// return the low/high part.
1947 std::pair<SDValue, SDValue> SplitVectorOperand(const SDNode *N, unsigned OpNo)
1948 {
1949 return SplitVector(N->getOperand(OpNo), SDLoc(N));
1950 }
1951
1952 /// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
1953 SDValue WidenVector(const SDValue &N, const SDLoc &DL);
1954
1955 /// Append the extracted elements from Start to Count out of the vector Op in
1956 /// Args. If Count is 0, all of the elements will be extracted. The extracted
1957 /// elements will have type EVT if it is provided, and otherwise their type
1958 /// will be Op's element type.
1959 void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
1960 unsigned Start = 0, unsigned Count = 0,
1961 EVT EltVT = EVT());
1962
1963 /// Compute the default alignment value for the given type.
1964 Align getEVTAlign(EVT MemoryVT) const;
1965 /// Compute the default alignment value for the given type.
1966 /// FIXME: Remove once transition to Align is over.
1967 inline unsigned getEVTAlignment(EVT MemoryVT) const {
1968 return getEVTAlign(MemoryVT).value();
1969 }
1970
1971 /// Test whether the given value is a constant int or similar node.
1972 SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const;
1973
1974 /// Test whether the given value is a constant FP or similar node.
1975 SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) const ;
1976
1977 /// \returns true if \p N is any kind of constant or build_vector of
1978 /// constants, int or float. If a vector, it may not necessarily be a splat.
1979 inline bool isConstantValueOfAnyType(SDValue N) const {
1980 return isConstantIntBuildVectorOrConstantInt(N) ||
1981 isConstantFPBuildVectorOrConstantFP(N);
1982 }
1983
1984 void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
1985 SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
1986 }
1987
1988 CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
1989 auto I = SDCallSiteDbgInfo.find(CallNode);
1990 if (I != SDCallSiteDbgInfo.end())
1991 return std::move(I->second).CSInfo;
1992 return CallSiteInfo();
1993 }
1994
1995 void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
1996 SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
1997 }
1998
1999 /// Return the HeapAllocSite type associated with the SDNode, if it exists.
2000 MDNode *getHeapAllocSite(const SDNode *Node) {
2001 auto It = SDCallSiteDbgInfo.find(Node);
2002 if (It == SDCallSiteDbgInfo.end())
2003 return nullptr;
2004 return It->second.HeapAllocSite;
2005 }
2006
2007 void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) {
2008 if (NoMerge)
2009 SDCallSiteDbgInfo[Node].NoMerge = NoMerge;
2010 }
2011
2012 bool getNoMergeSiteInfo(const SDNode *Node) {
2013 auto I = SDCallSiteDbgInfo.find(Node);
2014 if (I == SDCallSiteDbgInfo.end())
2015 return false;
2016 return I->second.NoMerge;
2017 }
2018
2019 /// Return the current function's default denormal handling kind for the given
2020 /// floating point type.
2021 DenormalMode getDenormalMode(EVT VT) const {
2022 return MF->getDenormalMode(EVTToAPFloatSemantics(VT));
2023 }
2024
2025 bool shouldOptForSize() const;
2026
2027 /// Get the (commutative) neutral element for the given opcode, if it exists.
2028 SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT,
2029 SDNodeFlags Flags);
2030
2031private:
2032 void InsertNode(SDNode *N);
2033 bool RemoveNodeFromCSEMaps(SDNode *N);
2034 void AddModifiedNodeToCSEMaps(SDNode *N);
2035 SDNode *FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos);
2036 SDNode *FindModifiedNodeSlot(SDNode *N, SDValue Op1, SDValue Op2,
2037 void *&InsertPos);
2038 SDNode *FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
2039 void *&InsertPos);
2040 SDNode *UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &loc);
2041
2042 void DeleteNodeNotInCSEMaps(SDNode *N);
2043 void DeallocateNode(SDNode *N);
2044
2045 void allnodes_clear();
2046
2047 /// Look up the node specified by ID in CSEMap. If it exists, return it. If
2048 /// not, return the insertion token that will make insertion faster. This
2049 /// overload is for nodes other than Constant or ConstantFP, use the other one
2050 /// for those.
2051 SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos);
2052
2053 /// Look up the node specified by ID in CSEMap. If it exists, return it. If
2054 /// not, return the insertion token that will make insertion faster. Performs
2055 /// additional processing for constant nodes.
2056 SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, const SDLoc &DL,
2057 void *&InsertPos);
2058
2059 /// List of non-single value types.
2060 FoldingSet<SDVTListNode> VTListMap;
2061
2062 /// Maps to auto-CSE operations.
2063 std::vector<CondCodeSDNode*> CondCodeNodes;
2064
2065 std::vector<SDNode*> ValueTypeNodes;
2066 std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
2067 StringMap<SDNode*> ExternalSymbols;
2068
2069 std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols;
2070 DenseMap<MCSymbol *, SDNode *> MCSymbols;
2071
2072 FlagInserter *Inserter = nullptr;
2073};
2074
2075template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
2076 using nodes_iterator = pointer_iterator<SelectionDAG::allnodes_iterator>;
2077
2078 static nodes_iterator nodes_begin(SelectionDAG *G) {
2079 return nodes_iterator(G->allnodes_begin());
2080 }
2081
2082 static nodes_iterator nodes_end(SelectionDAG *G) {
2083 return nodes_iterator(G->allnodes_end());
2084 }
2085};
2086
2087} // end namespace llvm
2088
2089#endif // LLVM_CODEGEN_SELECTIONDAG_H

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56
57namespace llvm {
58
59class APInt;
60class Constant;
61template <typename T> struct DenseMapInfo;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Return true if the node has at least one operand and all operands of the
122/// specified node are ISD::UNDEF.
123bool allOperandsUndef(const SDNode *N);
124
125} // end namespace ISD
126
127//===----------------------------------------------------------------------===//
128/// Unlike LLVM values, Selection DAG nodes may return multiple
129/// values as the result of a computation. Many nodes return multiple values,
130/// from loads (which define a token and a return value) to ADDC (which returns
131/// a result and a carry value), to calls (which may return an arbitrary number
132/// of values).
133///
134/// As such, each use of a SelectionDAG computation must indicate the node that
135/// computes it as well as which return value to use from that node. This pair
136/// of information is represented with the SDValue value type.
137///
138class SDValue {
139 friend struct DenseMapInfo<SDValue>;
140
141 SDNode *Node = nullptr; // The node defining the value we are using.
142 unsigned ResNo = 0; // Which return value of the node we are using.
143
144public:
145 SDValue() = default;
146 SDValue(SDNode *node, unsigned resno);
147
148 /// get the index which selects a specific result in the SDNode
149 unsigned getResNo() const { return ResNo; }
150
151 /// get the SDNode which holds the desired result
152 SDNode *getNode() const { return Node; }
153
154 /// set the SDNode
155 void setNode(SDNode *N) { Node = N; }
156
157 inline SDNode *operator->() const { return Node; }
158
159 bool operator==(const SDValue &O) const {
160 return Node == O.Node && ResNo == O.ResNo;
161 }
162 bool operator!=(const SDValue &O) const {
163 return !operator==(O);
164 }
165 bool operator<(const SDValue &O) const {
166 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
167 }
168 explicit operator bool() const {
169 return Node != nullptr;
170 }
171
172 SDValue getValue(unsigned R) const {
173 return SDValue(Node, R);
174 }
175
176 /// Return true if this node is an operand of N.
177 bool isOperandOf(const SDNode *N) const;
178
179 /// Return the ValueType of the referenced return value.
180 inline EVT getValueType() const;
181
182 /// Return the simple ValueType of the referenced return value.
183 MVT getSimpleValueType() const {
184 return getValueType().getSimpleVT();
185 }
186
187 /// Returns the size of the value in bits.
188 ///
189 /// If the value type is a scalable vector type, the scalable property will
190 /// be set and the runtime size will be a positive integer multiple of the
191 /// base size.
192 TypeSize getValueSizeInBits() const {
193 return getValueType().getSizeInBits();
194 }
195
196 uint64_t getScalarValueSizeInBits() const {
197 return getValueType().getScalarType().getFixedSizeInBits();
198 }
199
200 // Forwarding methods - These forward to the corresponding methods in SDNode.
201 inline unsigned getOpcode() const;
202 inline unsigned getNumOperands() const;
203 inline const SDValue &getOperand(unsigned i) const;
204 inline uint64_t getConstantOperandVal(unsigned i) const;
205 inline const APInt &getConstantOperandAPInt(unsigned i) const;
206 inline bool isTargetMemoryOpcode() const;
207 inline bool isTargetOpcode() const;
208 inline bool isMachineOpcode() const;
209 inline bool isUndef() const;
210 inline unsigned getMachineOpcode() const;
211 inline const DebugLoc &getDebugLoc() const;
212 inline void dump() const;
213 inline void dump(const SelectionDAG *G) const;
214 inline void dumpr() const;
215 inline void dumpr(const SelectionDAG *G) const;
216
217 /// Return true if this operand (which must be a chain) reaches the
218 /// specified operand without crossing any side-effecting instructions.
219 /// In practice, this looks through token factors and non-volatile loads.
220 /// In order to remain efficient, this only
221 /// looks a couple of nodes in, it does not do an exhaustive search.
222 bool reachesChainWithoutSideEffects(SDValue Dest,
223 unsigned Depth = 2) const;
224
225 /// Return true if there are no nodes using value ResNo of Node.
226 inline bool use_empty() const;
227
228 /// Return true if there is exactly one node using value ResNo of Node.
229 inline bool hasOneUse() const;
230};
231
232template<> struct DenseMapInfo<SDValue> {
233 static inline SDValue getEmptyKey() {
234 SDValue V;
235 V.ResNo = -1U;
236 return V;
237 }
238
239 static inline SDValue getTombstoneKey() {
240 SDValue V;
241 V.ResNo = -2U;
242 return V;
243 }
244
245 static unsigned getHashValue(const SDValue &Val) {
246 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
247 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
248 }
249
250 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
251 return LHS == RHS;
252 }
253};
254
255/// Allow casting operators to work directly on
256/// SDValues as if they were SDNode*'s.
257template<> struct simplify_type<SDValue> {
258 using SimpleType = SDNode *;
259
260 static SimpleType getSimplifiedValue(SDValue &Val) {
261 return Val.getNode();
262 }
263};
264template<> struct simplify_type<const SDValue> {
265 using SimpleType = /*const*/ SDNode *;
266
267 static SimpleType getSimplifiedValue(const SDValue &Val) {
268 return Val.getNode();
269 }
270};
271
272/// Represents a use of a SDNode. This class holds an SDValue,
273/// which records the SDNode being used and the result number, a
274/// pointer to the SDNode using the value, and Next and Prev pointers,
275/// which link together all the uses of an SDNode.
276///
277class SDUse {
278 /// Val - The value being used.
279 SDValue Val;
280 /// User - The user of this value.
281 SDNode *User = nullptr;
282 /// Prev, Next - Pointers to the uses list of the SDNode referred by
283 /// this operand.
284 SDUse **Prev = nullptr;
285 SDUse *Next = nullptr;
286
287public:
288 SDUse() = default;
289 SDUse(const SDUse &U) = delete;
290 SDUse &operator=(const SDUse &) = delete;
291
292 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
293 operator const SDValue&() const { return Val; }
294
295 /// If implicit conversion to SDValue doesn't work, the get() method returns
296 /// the SDValue.
297 const SDValue &get() const { return Val; }
298
299 /// This returns the SDNode that contains this Use.
300 SDNode *getUser() { return User; }
301
302 /// Get the next SDUse in the use list.
303 SDUse *getNext() const { return Next; }
304
305 /// Convenience function for get().getNode().
306 SDNode *getNode() const { return Val.getNode(); }
307 /// Convenience function for get().getResNo().
308 unsigned getResNo() const { return Val.getResNo(); }
309 /// Convenience function for get().getValueType().
310 EVT getValueType() const { return Val.getValueType(); }
311
312 /// Convenience function for get().operator==
313 bool operator==(const SDValue &V) const {
314 return Val == V;
315 }
316
317 /// Convenience function for get().operator!=
318 bool operator!=(const SDValue &V) const {
319 return Val != V;
320 }
321
322 /// Convenience function for get().operator<
323 bool operator<(const SDValue &V) const {
324 return Val < V;
325 }
326
327private:
328 friend class SelectionDAG;
329 friend class SDNode;
330 // TODO: unfriend HandleSDNode once we fix its operand handling.
331 friend class HandleSDNode;
332
333 void setUser(SDNode *p) { User = p; }
334
335 /// Remove this use from its existing use list, assign it the
336 /// given value, and add it to the new value's node's use list.
337 inline void set(const SDValue &V);
338 /// Like set, but only supports initializing a newly-allocated
339 /// SDUse with a non-null value.
340 inline void setInitial(const SDValue &V);
341 /// Like set, but only sets the Node portion of the value,
342 /// leaving the ResNo portion unmodified.
343 inline void setNode(SDNode *N);
344
345 void addToList(SDUse **List) {
346 Next = *List;
347 if (Next) Next->Prev = &Next;
348 Prev = List;
349 *List = this;
350 }
351
352 void removeFromList() {
353 *Prev = Next;
354 if (Next) Next->Prev = Prev;
355 }
356};
357
358/// simplify_type specializations - Allow casting operators to work directly on
359/// SDValues as if they were SDNode*'s.
360template<> struct simplify_type<SDUse> {
361 using SimpleType = SDNode *;
362
363 static SimpleType getSimplifiedValue(SDUse &Val) {
364 return Val.getNode();
365 }
366};
367
368/// These are IR-level optimization flags that may be propagated to SDNodes.
369/// TODO: This data structure should be shared by the IR optimizer and the
370/// the backend.
371struct SDNodeFlags {
372private:
373 bool NoUnsignedWrap : 1;
374 bool NoSignedWrap : 1;
375 bool Exact : 1;
376 bool NoNaNs : 1;
377 bool NoInfs : 1;
378 bool NoSignedZeros : 1;
379 bool AllowReciprocal : 1;
380 bool AllowContract : 1;
381 bool ApproximateFuncs : 1;
382 bool AllowReassociation : 1;
383
384 // We assume instructions do not raise floating-point exceptions by default,
385 // and only those marked explicitly may do so. We could choose to represent
386 // this via a positive "FPExcept" flags like on the MI level, but having a
387 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
388 // intersection logic more straightforward.
389 bool NoFPExcept : 1;
390
391public:
392 /// Default constructor turns off all optimization flags.
393 SDNodeFlags()
394 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
395 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
396 AllowContract(false), ApproximateFuncs(false),
397 AllowReassociation(false), NoFPExcept(false) {}
398
399 /// Propagate the fast-math-flags from an IR FPMathOperator.
400 void copyFMF(const FPMathOperator &FPMO) {
401 setNoNaNs(FPMO.hasNoNaNs());
402 setNoInfs(FPMO.hasNoInfs());
403 setNoSignedZeros(FPMO.hasNoSignedZeros());
404 setAllowReciprocal(FPMO.hasAllowReciprocal());
405 setAllowContract(FPMO.hasAllowContract());
406 setApproximateFuncs(FPMO.hasApproxFunc());
407 setAllowReassociation(FPMO.hasAllowReassoc());
408 }
409
410 // These are mutators for each flag.
411 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
412 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
413 void setExact(bool b) { Exact = b; }
414 void setNoNaNs(bool b) { NoNaNs = b; }
415 void setNoInfs(bool b) { NoInfs = b; }
416 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
417 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
418 void setAllowContract(bool b) { AllowContract = b; }
419 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
420 void setAllowReassociation(bool b) { AllowReassociation = b; }
421 void setNoFPExcept(bool b) { NoFPExcept = b; }
422
423 // These are accessors for each flag.
424 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
425 bool hasNoSignedWrap() const { return NoSignedWrap; }
426 bool hasExact() const { return Exact; }
427 bool hasNoNaNs() const { return NoNaNs; }
428 bool hasNoInfs() const { return NoInfs; }
429 bool hasNoSignedZeros() const { return NoSignedZeros; }
430 bool hasAllowReciprocal() const { return AllowReciprocal; }
431 bool hasAllowContract() const { return AllowContract; }
432 bool hasApproximateFuncs() const { return ApproximateFuncs; }
433 bool hasAllowReassociation() const { return AllowReassociation; }
434 bool hasNoFPExcept() const { return NoFPExcept; }
435
436 /// Clear any flags in this flag set that aren't also set in Flags. All
437 /// flags will be cleared if Flags are undefined.
438 void intersectWith(const SDNodeFlags Flags) {
439 NoUnsignedWrap &= Flags.NoUnsignedWrap;
440 NoSignedWrap &= Flags.NoSignedWrap;
441 Exact &= Flags.Exact;
442 NoNaNs &= Flags.NoNaNs;
443 NoInfs &= Flags.NoInfs;
444 NoSignedZeros &= Flags.NoSignedZeros;
445 AllowReciprocal &= Flags.AllowReciprocal;
446 AllowContract &= Flags.AllowContract;
447 ApproximateFuncs &= Flags.ApproximateFuncs;
448 AllowReassociation &= Flags.AllowReassociation;
449 NoFPExcept &= Flags.NoFPExcept;
450 }
451};
452
453/// Represents one node in the SelectionDAG.
454///
455class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
456private:
457 /// The operation that this node performs.
458 int16_t NodeType;
459
460protected:
461 // We define a set of mini-helper classes to help us interpret the bits in our
462 // SubclassData. These are designed to fit within a uint16_t so they pack
463 // with NodeType.
464
465#if defined(_AIX) && (!defined(__GNUC__4) || defined(__ibmxl__))
466// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
467// and give the `pack` pragma push semantics.
468#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
469#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
470#else
471#define BEGIN_TWO_BYTE_PACK()
472#define END_TWO_BYTE_PACK()
473#endif
474
475BEGIN_TWO_BYTE_PACK()
476 class SDNodeBitfields {
477 friend class SDNode;
478 friend class MemIntrinsicSDNode;
479 friend class MemSDNode;
480 friend class SelectionDAG;
481
482 uint16_t HasDebugValue : 1;
483 uint16_t IsMemIntrinsic : 1;
484 uint16_t IsDivergent : 1;
485 };
486 enum { NumSDNodeBits = 3 };
487
488 class ConstantSDNodeBitfields {
489 friend class ConstantSDNode;
490
491 uint16_t : NumSDNodeBits;
492
493 uint16_t IsOpaque : 1;
494 };
495
496 class MemSDNodeBitfields {
497 friend class MemSDNode;
498 friend class MemIntrinsicSDNode;
499 friend class AtomicSDNode;
500
501 uint16_t : NumSDNodeBits;
502
503 uint16_t IsVolatile : 1;
504 uint16_t IsNonTemporal : 1;
505 uint16_t IsDereferenceable : 1;
506 uint16_t IsInvariant : 1;
507 };
508 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
509
510 class LSBaseSDNodeBitfields {
511 friend class LSBaseSDNode;
512 friend class MaskedLoadStoreSDNode;
513 friend class MaskedGatherScatterSDNode;
514
515 uint16_t : NumMemSDNodeBits;
516
517 // This storage is shared between disparate class hierarchies to hold an
518 // enumeration specific to the class hierarchy in use.
519 // LSBaseSDNode => enum ISD::MemIndexedMode
520 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
521 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
522 uint16_t AddressingMode : 3;
523 };
524 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
525
526 class LoadSDNodeBitfields {
527 friend class LoadSDNode;
528 friend class MaskedLoadSDNode;
529 friend class MaskedGatherSDNode;
530
531 uint16_t : NumLSBaseSDNodeBits;
532
533 uint16_t ExtTy : 2; // enum ISD::LoadExtType
534 uint16_t IsExpanding : 1;
535 };
536
537 class StoreSDNodeBitfields {
538 friend class StoreSDNode;
539 friend class MaskedStoreSDNode;
540 friend class MaskedScatterSDNode;
541
542 uint16_t : NumLSBaseSDNodeBits;
543
544 uint16_t IsTruncating : 1;
545 uint16_t IsCompressing : 1;
546 };
547
548 union {
549 char RawSDNodeBits[sizeof(uint16_t)];
550 SDNodeBitfields SDNodeBits;
551 ConstantSDNodeBitfields ConstantSDNodeBits;
552 MemSDNodeBitfields MemSDNodeBits;
553 LSBaseSDNodeBitfields LSBaseSDNodeBits;
554 LoadSDNodeBitfields LoadSDNodeBits;
555 StoreSDNodeBitfields StoreSDNodeBits;
556 };
557END_TWO_BYTE_PACK()
558#undef BEGIN_TWO_BYTE_PACK
559#undef END_TWO_BYTE_PACK
560
561 // RawSDNodeBits must cover the entirety of the union. This means that all of
562 // the union's members must have size <= RawSDNodeBits. We write the RHS as
563 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
564 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
565 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
566 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
567 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
568 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
569 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
570
571private:
572 friend class SelectionDAG;
573 // TODO: unfriend HandleSDNode once we fix its operand handling.
574 friend class HandleSDNode;
575
576 /// Unique id per SDNode in the DAG.
577 int NodeId = -1;
578
579 /// The values that are used by this operation.
580 SDUse *OperandList = nullptr;
581
582 /// The types of the values this node defines. SDNode's may
583 /// define multiple values simultaneously.
584 const EVT *ValueList;
585
586 /// List of uses for this SDNode.
587 SDUse *UseList = nullptr;
588
589 /// The number of entries in the Operand/Value list.
590 unsigned short NumOperands = 0;
591 unsigned short NumValues;
592
593 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
594 // original LLVM instructions.
595 // This is used for turning off scheduling, because we'll forgo
596 // the normal scheduling algorithms and output the instructions according to
597 // this ordering.
598 unsigned IROrder;
599
600 /// Source line information.
601 DebugLoc debugLoc;
602
603 /// Return a pointer to the specified value type.
604 static const EVT *getValueTypeList(EVT VT);
605
606 SDNodeFlags Flags;
607
608public:
609 /// Unique and persistent id per SDNode in the DAG.
610 /// Used for debug printing.
611 uint16_t PersistentId;
612
613 //===--------------------------------------------------------------------===//
614 // Accessors
615 //
616
617 /// Return the SelectionDAG opcode value for this node. For
618 /// pre-isel nodes (those for which isMachineOpcode returns false), these
619 /// are the opcode values in the ISD and <target>ISD namespaces. For
620 /// post-isel opcodes, see getMachineOpcode.
621 unsigned getOpcode() const { return (unsigned short)NodeType; }
622
623 /// Test if this node has a target-specific opcode (in the
624 /// \<target\>ISD namespace).
625 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
626
627 /// Test if this node has a target-specific opcode that may raise
628 /// FP exceptions (in the \<target\>ISD namespace and greater than
629 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
630 /// opcode are currently automatically considered to possibly raise
631 /// FP exceptions as well.
632 bool isTargetStrictFPOpcode() const {
633 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
634 }
635
636 /// Test if this node has a target-specific
637 /// memory-referencing opcode (in the \<target\>ISD namespace and
638 /// greater than FIRST_TARGET_MEMORY_OPCODE).
639 bool isTargetMemoryOpcode() const {
640 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
641 }
642
643 /// Return true if the type of the node type undefined.
644 bool isUndef() const { return NodeType == ISD::UNDEF; }
645
646 /// Test if this node is a memory intrinsic (with valid pointer information).
647 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
648 /// non-memory intrinsics (with chains) that are not really instances of
649 /// MemSDNode. For such nodes, we need some extra state to determine the
650 /// proper classof relationship.
651 bool isMemIntrinsic() const {
652 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
653 NodeType == ISD::INTRINSIC_VOID) &&
654 SDNodeBits.IsMemIntrinsic;
655 }
656
657 /// Test if this node is a strict floating point pseudo-op.
658 bool isStrictFPOpcode() {
659 switch (NodeType) {
660 default:
661 return false;
662 case ISD::STRICT_FP16_TO_FP:
663 case ISD::STRICT_FP_TO_FP16:
664#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
665 case ISD::STRICT_##DAGN:
666#include "llvm/IR/ConstrainedOps.def"
667 return true;
668 }
669 }
670
671 /// Test if this node has a post-isel opcode, directly
672 /// corresponding to a MachineInstr opcode.
673 bool isMachineOpcode() const { return NodeType < 0; }
674
675 /// This may only be called if isMachineOpcode returns
676 /// true. It returns the MachineInstr opcode value that the node's opcode
677 /// corresponds to.
678 unsigned getMachineOpcode() const {
679 assert(isMachineOpcode() && "Not a MachineInstr opcode!")((isMachineOpcode() && "Not a MachineInstr opcode!") ?
static_cast<void> (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 679, __PRETTY_FUNCTION__))
;
680 return ~NodeType;
681 }
682
683 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
684 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
685
686 bool isDivergent() const { return SDNodeBits.IsDivergent; }
687
688 /// Return true if there are no uses of this node.
689 bool use_empty() const { return UseList == nullptr; }
690
691 /// Return true if there is exactly one use of this node.
692 bool hasOneUse() const { return hasSingleElement(uses()); }
693
694 /// Return the number of uses of this node. This method takes
695 /// time proportional to the number of uses.
696 size_t use_size() const { return std::distance(use_begin(), use_end()); }
697
698 /// Return the unique node id.
699 int getNodeId() const { return NodeId; }
700
701 /// Set unique node id.
702 void setNodeId(int Id) { NodeId = Id; }
703
704 /// Return the node ordering.
705 unsigned getIROrder() const { return IROrder; }
706
707 /// Set the node ordering.
708 void setIROrder(unsigned Order) { IROrder = Order; }
709
710 /// Return the source location info.
711 const DebugLoc &getDebugLoc() const { return debugLoc; }
712
713 /// Set source location info. Try to avoid this, putting
714 /// it in the constructor is preferable.
715 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
716
717 /// This class provides iterator support for SDUse
718 /// operands that use a specific SDNode.
719 class use_iterator {
720 friend class SDNode;
721
722 SDUse *Op = nullptr;
723
724 explicit use_iterator(SDUse *op) : Op(op) {}
725
726 public:
727 using iterator_category = std::forward_iterator_tag;
728 using value_type = SDUse;
729 using difference_type = std::ptrdiff_t;
730 using pointer = value_type *;
731 using reference = value_type &;
732
733 use_iterator() = default;
734 use_iterator(const use_iterator &I) : Op(I.Op) {}
735
736 bool operator==(const use_iterator &x) const {
737 return Op == x.Op;
738 }
739 bool operator!=(const use_iterator &x) const {
740 return !operator==(x);
741 }
742
743 /// Return true if this iterator is at the end of uses list.
744 bool atEnd() const { return Op == nullptr; }
745
746 // Iterator traversal: forward iteration only.
747 use_iterator &operator++() { // Preincrement
748 assert(Op && "Cannot increment end iterator!")((Op && "Cannot increment end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 748, __PRETTY_FUNCTION__))
;
749 Op = Op->getNext();
750 return *this;
751 }
752
753 use_iterator operator++(int) { // Postincrement
754 use_iterator tmp = *this; ++*this; return tmp;
755 }
756
757 /// Retrieve a pointer to the current user node.
758 SDNode *operator*() const {
759 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 759, __PRETTY_FUNCTION__))
;
760 return Op->getUser();
761 }
762
763 SDNode *operator->() const { return operator*(); }
764
765 SDUse &getUse() const { return *Op; }
766
767 /// Retrieve the operand # of this use in its user.
768 unsigned getOperandNo() const {
769 assert(Op && "Cannot dereference end iterator!")((Op && "Cannot dereference end iterator!") ? static_cast
<void> (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 769, __PRETTY_FUNCTION__))
;
770 return (unsigned)(Op - Op->getUser()->OperandList);
771 }
772 };
773
774 /// Provide iteration support to walk over all uses of an SDNode.
775 use_iterator use_begin() const {
776 return use_iterator(UseList);
777 }
778
779 static use_iterator use_end() { return use_iterator(nullptr); }
780
781 inline iterator_range<use_iterator> uses() {
782 return make_range(use_begin(), use_end());
783 }
784 inline iterator_range<use_iterator> uses() const {
785 return make_range(use_begin(), use_end());
786 }
787
788 /// Return true if there are exactly NUSES uses of the indicated value.
789 /// This method ignores uses of other values defined by this operation.
790 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
791
792 /// Return true if there are any use of the indicated value.
793 /// This method ignores uses of other values defined by this operation.
794 bool hasAnyUseOfValue(unsigned Value) const;
795
796 /// Return true if this node is the only use of N.
797 bool isOnlyUserOf(const SDNode *N) const;
798
799 /// Return true if this node is an operand of N.
800 bool isOperandOf(const SDNode *N) const;
801
802 /// Return true if this node is a predecessor of N.
803 /// NOTE: Implemented on top of hasPredecessor and every bit as
804 /// expensive. Use carefully.
805 bool isPredecessorOf(const SDNode *N) const {
806 return N->hasPredecessor(this);
807 }
808
809 /// Return true if N is a predecessor of this node.
810 /// N is either an operand of this node, or can be reached by recursively
811 /// traversing up the operands.
812 /// NOTE: This is an expensive method. Use it carefully.
813 bool hasPredecessor(const SDNode *N) const;
814
815 /// Returns true if N is a predecessor of any node in Worklist. This
816 /// helper keeps Visited and Worklist sets externally to allow unions
817 /// searches to be performed in parallel, caching of results across
818 /// queries and incremental addition to Worklist. Stops early if N is
819 /// found but will resume. Remember to clear Visited and Worklists
820 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
821 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
822 /// topologically ordered (Operands have strictly smaller node id) and search
823 /// can be pruned leveraging this.
824 static bool hasPredecessorHelper(const SDNode *N,
825 SmallPtrSetImpl<const SDNode *> &Visited,
826 SmallVectorImpl<const SDNode *> &Worklist,
827 unsigned int MaxSteps = 0,
828 bool TopologicalPrune = false) {
829 SmallVector<const SDNode *, 8> DeferredNodes;
830 if (Visited.count(N))
831 return true;
832
833 // Node Id's are assigned in three places: As a topological
834 // ordering (> 0), during legalization (results in values set to
835 // 0), new nodes (set to -1). If N has a topolgical id then we
836 // know that all nodes with ids smaller than it cannot be
837 // successors and we need not check them. Filter out all node
838 // that can't be matches. We add them to the worklist before exit
839 // in case of multiple calls. Note that during selection the topological id
840 // may be violated if a node's predecessor is selected before it. We mark
841 // this at selection negating the id of unselected successors and
842 // restricting topological pruning to positive ids.
843
844 int NId = N->getNodeId();
845 // If we Invalidated the Id, reconstruct original NId.
846 if (NId < -1)
847 NId = -(NId + 1);
848
849 bool Found = false;
850 while (!Worklist.empty()) {
851 const SDNode *M = Worklist.pop_back_val();
852 int MId = M->getNodeId();
853 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
854 (MId > 0) && (MId < NId)) {
855 DeferredNodes.push_back(M);
856 continue;
857 }
858 for (const SDValue &OpV : M->op_values()) {
859 SDNode *Op = OpV.getNode();
860 if (Visited.insert(Op).second)
861 Worklist.push_back(Op);
862 if (Op == N)
863 Found = true;
864 }
865 if (Found)
866 break;
867 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
868 break;
869 }
870 // Push deferred nodes back on worklist.
871 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
872 // If we bailed early, conservatively return found.
873 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
874 return true;
875 return Found;
876 }
877
878 /// Return true if all the users of N are contained in Nodes.
879 /// NOTE: Requires at least one match, but doesn't require them all.
880 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
881
882 /// Return the number of values used by this operation.
883 unsigned getNumOperands() const { return NumOperands; }
884
885 /// Return the maximum number of operands that a SDNode can hold.
886 static constexpr size_t getMaxNumOperands() {
887 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
888 }
889
890 /// Helper method returns the integer value of a ConstantSDNode operand.
891 inline uint64_t getConstantOperandVal(unsigned Num) const;
892
893 /// Helper method returns the APInt of a ConstantSDNode operand.
894 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
895
896 const SDValue &getOperand(unsigned Num) const {
897 assert(Num < NumOperands && "Invalid child # of SDNode!")((Num < NumOperands && "Invalid child # of SDNode!"
) ? static_cast<void> (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 897, __PRETTY_FUNCTION__))
;
898 return OperandList[Num];
899 }
900
901 using op_iterator = SDUse *;
902
903 op_iterator op_begin() const { return OperandList; }
904 op_iterator op_end() const { return OperandList+NumOperands; }
905 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
906
907 /// Iterator for directly iterating over the operand SDValue's.
908 struct value_op_iterator
909 : iterator_adaptor_base<value_op_iterator, op_iterator,
910 std::random_access_iterator_tag, SDValue,
911 ptrdiff_t, value_op_iterator *,
912 value_op_iterator *> {
913 explicit value_op_iterator(SDUse *U = nullptr)
914 : iterator_adaptor_base(U) {}
915
916 const SDValue &operator*() const { return I->get(); }
917 };
918
919 iterator_range<value_op_iterator> op_values() const {
920 return make_range(value_op_iterator(op_begin()),
921 value_op_iterator(op_end()));
922 }
923
924 SDVTList getVTList() const {
925 SDVTList X = { ValueList, NumValues };
926 return X;
927 }
928
929 /// If this node has a glue operand, return the node
930 /// to which the glue operand points. Otherwise return NULL.
931 SDNode *getGluedNode() const {
932 if (getNumOperands() != 0 &&
933 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
934 return getOperand(getNumOperands()-1).getNode();
935 return nullptr;
936 }
937
938 /// If this node has a glue value with a user, return
939 /// the user (there is at most one). Otherwise return NULL.
940 SDNode *getGluedUser() const {
941 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
942 if (UI.getUse().get().getValueType() == MVT::Glue)
943 return *UI;
944 return nullptr;
945 }
946
947 SDNodeFlags getFlags() const { return Flags; }
948 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
949
950 /// Clear any flags in this node that aren't also set in Flags.
951 /// If Flags is not in a defined state then this has no effect.
952 void intersectFlagsWith(const SDNodeFlags Flags);
953
954 /// Return the number of values defined/returned by this operator.
955 unsigned getNumValues() const { return NumValues; }
956
957 /// Return the type of a specified result.
958 EVT getValueType(unsigned ResNo) const {
959 assert(ResNo < NumValues && "Illegal result number!")((ResNo < NumValues && "Illegal result number!") ?
static_cast<void> (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 959, __PRETTY_FUNCTION__))
;
960 return ValueList[ResNo];
961 }
962
963 /// Return the type of a specified result as a simple type.
964 MVT getSimpleValueType(unsigned ResNo) const {
965 return getValueType(ResNo).getSimpleVT();
966 }
967
968 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
969 ///
970 /// If the value type is a scalable vector type, the scalable property will
971 /// be set and the runtime size will be a positive integer multiple of the
972 /// base size.
973 TypeSize getValueSizeInBits(unsigned ResNo) const {
974 return getValueType(ResNo).getSizeInBits();
975 }
976
977 using value_iterator = const EVT *;
978
979 value_iterator value_begin() const { return ValueList; }
980 value_iterator value_end() const { return ValueList+NumValues; }
981 iterator_range<value_iterator> values() const {
982 return llvm::make_range(value_begin(), value_end());
983 }
984
985 /// Return the opcode of this operation for printing.
986 std::string getOperationName(const SelectionDAG *G = nullptr) const;
987 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
988 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
989 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
990 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
991 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
992
993 /// Print a SelectionDAG node and all children down to
994 /// the leaves. The given SelectionDAG allows target-specific nodes
995 /// to be printed in human-readable form. Unlike printr, this will
996 /// print the whole DAG, including children that appear multiple
997 /// times.
998 ///
999 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1000
1001 /// Print a SelectionDAG node and children up to
1002 /// depth "depth." The given SelectionDAG allows target-specific
1003 /// nodes to be printed in human-readable form. Unlike printr, this
1004 /// will print children that appear multiple times wherever they are
1005 /// used.
1006 ///
1007 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1008 unsigned depth = 100) const;
1009
1010 /// Dump this node, for debugging.
1011 void dump() const;
1012
1013 /// Dump (recursively) this node and its use-def subgraph.
1014 void dumpr() const;
1015
1016 /// Dump this node, for debugging.
1017 /// The given SelectionDAG allows target-specific nodes to be printed
1018 /// in human-readable form.
1019 void dump(const SelectionDAG *G) const;
1020
1021 /// Dump (recursively) this node and its use-def subgraph.
1022 /// The given SelectionDAG allows target-specific nodes to be printed
1023 /// in human-readable form.
1024 void dumpr(const SelectionDAG *G) const;
1025
1026 /// printrFull to dbgs(). The given SelectionDAG allows
1027 /// target-specific nodes to be printed in human-readable form.
1028 /// Unlike dumpr, this will print the whole DAG, including children
1029 /// that appear multiple times.
1030 void dumprFull(const SelectionDAG *G = nullptr) const;
1031
1032 /// printrWithDepth to dbgs(). The given
1033 /// SelectionDAG allows target-specific nodes to be printed in
1034 /// human-readable form. Unlike dumpr, this will print children
1035 /// that appear multiple times wherever they are used.
1036 ///
1037 void dumprWithDepth(const SelectionDAG *G = nullptr,
1038 unsigned depth = 100) const;
1039
1040 /// Gather unique data for the node.
1041 void Profile(FoldingSetNodeID &ID) const;
1042
1043 /// This method should only be used by the SDUse class.
1044 void addUse(SDUse &U) { U.addToList(&UseList); }
1045
1046protected:
1047 static SDVTList getSDVTList(EVT VT) {
1048 SDVTList Ret = { getValueTypeList(VT), 1 };
1049 return Ret;
1050 }
1051
1052 /// Create an SDNode.
1053 ///
1054 /// SDNodes are created without any operands, and never own the operand
1055 /// storage. To add operands, see SelectionDAG::createOperands.
1056 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1057 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1058 IROrder(Order), debugLoc(std::move(dl)) {
1059 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1060 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((debugLoc.hasTrivialDestructor() && "Expected trivial destructor"
) ? static_cast<void> (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1060, __PRETTY_FUNCTION__))
;
1061 assert(NumValues == VTs.NumVTs &&((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __PRETTY_FUNCTION__))
1062 "NumValues wasn't wide enough for its operands!")((NumValues == VTs.NumVTs && "NumValues wasn't wide enough for its operands!"
) ? static_cast<void> (0) : __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1062, __PRETTY_FUNCTION__))
;
1063 }
1064
1065 /// Release the operands and set this node to have zero operands.
1066 void DropOperands();
1067};
1068
1069/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1070/// into SDNode creation functions.
1071/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1072/// from the original Instruction, and IROrder is the ordinal position of
1073/// the instruction.
1074/// When an SDNode is created after the DAG is being built, both DebugLoc and
1075/// the IROrder are propagated from the original SDNode.
1076/// So SDLoc class provides two constructors besides the default one, one to
1077/// be used by the DAGBuilder, the other to be used by others.
1078class SDLoc {
1079private:
1080 DebugLoc DL;
1081 int IROrder = 0;
1082
1083public:
1084 SDLoc() = default;
1085 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1086 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1087 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1088 assert(Order >= 0 && "bad IROrder")((Order >= 0 && "bad IROrder") ? static_cast<void
> (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1088, __PRETTY_FUNCTION__))
;
1089 if (I)
1090 DL = I->getDebugLoc();
1091 }
1092
1093 unsigned getIROrder() const { return IROrder; }
1094 const DebugLoc &getDebugLoc() const { return DL; }
1095};
1096
1097// Define inline functions from the SDValue class.
1098
1099inline SDValue::SDValue(SDNode *node, unsigned resno)
1100 : Node(node), ResNo(resno) {
1101 // Explicitly check for !ResNo to avoid use-after-free, because there are
1102 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1103 // combines.
1104 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __PRETTY_FUNCTION__))
1105 "Invalid result number for the given node!")(((!Node || !ResNo || ResNo < Node->getNumValues()) &&
"Invalid result number for the given node!") ? static_cast<
void> (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1105, __PRETTY_FUNCTION__))
;
1106 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? static_cast<void> (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1106, __PRETTY_FUNCTION__))
;
1107}
1108
1109inline unsigned SDValue::getOpcode() const {
1110 return Node->getOpcode();
1111}
1112
1113inline EVT SDValue::getValueType() const {
1114 return Node->getValueType(ResNo);
34
Called C++ object pointer is null
1115}
1116
1117inline unsigned SDValue::getNumOperands() const {
1118 return Node->getNumOperands();
1119}
1120
1121inline const SDValue &SDValue::getOperand(unsigned i) const {
1122 return Node->getOperand(i);
1123}
1124
1125inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1126 return Node->getConstantOperandVal(i);
1127}
1128
1129inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1130 return Node->getConstantOperandAPInt(i);
1131}
1132
1133inline bool SDValue::isTargetOpcode() const {
1134 return Node->isTargetOpcode();
1135}
1136
1137inline bool SDValue::isTargetMemoryOpcode() const {
1138 return Node->isTargetMemoryOpcode();
1139}
1140
1141inline bool SDValue::isMachineOpcode() const {
1142 return Node->isMachineOpcode();
1143}
1144
1145inline unsigned SDValue::getMachineOpcode() const {
1146 return Node->getMachineOpcode();
1147}
1148
1149inline bool SDValue::isUndef() const {
1150 return Node->isUndef();
1151}
1152
1153inline bool SDValue::use_empty() const {
1154 return !Node->hasAnyUseOfValue(ResNo);
1155}
1156
1157inline bool SDValue::hasOneUse() const {
1158 return Node->hasNUsesOfValue(1, ResNo);
1159}
1160
1161inline const DebugLoc &SDValue::getDebugLoc() const {
1162 return Node->getDebugLoc();
1163}
1164
1165inline void SDValue::dump() const {
1166 return Node->dump();
1167}
1168
1169inline void SDValue::dump(const SelectionDAG *G) const {
1170 return Node->dump(G);
1171}
1172
1173inline void SDValue::dumpr() const {
1174 return Node->dumpr();
1175}
1176
1177inline void SDValue::dumpr(const SelectionDAG *G) const {
1178 return Node->dumpr(G);
1179}
1180
1181// Define inline functions from the SDUse class.
1182
1183inline void SDUse::set(const SDValue &V) {
1184 if (Val.getNode()) removeFromList();
1185 Val = V;
1186 if (V.getNode()) V.getNode()->addUse(*this);
1187}
1188
1189inline void SDUse::setInitial(const SDValue &V) {
1190 Val = V;
1191 V.getNode()->addUse(*this);
1192}
1193
1194inline void SDUse::setNode(SDNode *N) {
1195 if (Val.getNode()) removeFromList();
1196 Val.setNode(N);
1197 if (N) N->addUse(*this);
1198}
1199
1200/// This class is used to form a handle around another node that
1201/// is persistent and is updated across invocations of replaceAllUsesWith on its
1202/// operand. This node should be directly created by end-users and not added to
1203/// the AllNodes list.
1204class HandleSDNode : public SDNode {
1205 SDUse Op;
1206
1207public:
1208 explicit HandleSDNode(SDValue X)
1209 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1210 // HandleSDNodes are never inserted into the DAG, so they won't be
1211 // auto-numbered. Use ID 65535 as a sentinel.
1212 PersistentId = 0xffff;
1213
1214 // Manually set up the operand list. This node type is special in that it's
1215 // always stack allocated and SelectionDAG does not manage its operands.
1216 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1217 // be so special.
1218 Op.setUser(this);
1219 Op.setInitial(X);
1220 NumOperands = 1;
1221 OperandList = &Op;
1222 }
1223 ~HandleSDNode();
1224
1225 const SDValue &getValue() const { return Op; }
1226};
1227
1228class AddrSpaceCastSDNode : public SDNode {
1229private:
1230 unsigned SrcAddrSpace;
1231 unsigned DestAddrSpace;
1232
1233public:
1234 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1235 unsigned SrcAS, unsigned DestAS);
1236
1237 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1238 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1239
1240 static bool classof(const SDNode *N) {
1241 return N->getOpcode() == ISD::ADDRSPACECAST;
1242 }
1243};
1244
1245/// This is an abstract virtual class for memory operations.
1246class MemSDNode : public SDNode {
1247private:
1248 // VT of in-memory value.
1249 EVT MemoryVT;
1250
1251protected:
1252 /// Memory reference information.
1253 MachineMemOperand *MMO;
1254
1255public:
1256 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1257 EVT memvt, MachineMemOperand *MMO);
1258
1259 bool readMem() const { return MMO->isLoad(); }
1260 bool writeMem() const { return MMO->isStore(); }
1261
1262 /// Returns alignment and volatility of the memory access
1263 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1264 Align getAlign() const { return MMO->getAlign(); }
1265 LLVM_ATTRIBUTE_DEPRECATED(unsigned getOriginalAlignment() const,[[deprecated("Use getOriginalAlign() instead")]] unsigned getOriginalAlignment
() const
1266 "Use getOriginalAlign() instead")[[deprecated("Use getOriginalAlign() instead")]] unsigned getOriginalAlignment
() const
{
1267 return MMO->getBaseAlign().value();
1268 }
1269 // FIXME: Remove once transition to getAlign is over.
1270 unsigned getAlignment() const { return MMO->getAlign().value(); }
1271
1272 /// Return the SubclassData value, without HasDebugValue. This contains an
1273 /// encoding of the volatile flag, as well as bits used by subclasses. This
1274 /// function should only be used to compute a FoldingSetNodeID value.
1275 /// The HasDebugValue bit is masked out because CSE map needs to match
1276 /// nodes with debug info with nodes without debug info. Same is about
1277 /// isDivergent bit.
1278 unsigned getRawSubclassData() const {
1279 uint16_t Data;
1280 union {
1281 char RawSDNodeBits[sizeof(uint16_t)];
1282 SDNodeBitfields SDNodeBits;
1283 };
1284 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1285 SDNodeBits.HasDebugValue = 0;
1286 SDNodeBits.IsDivergent = false;
1287 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1288 return Data;
1289 }
1290
1291 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1292 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1293 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1294 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1295
1296 // Returns the offset from the location of the access.
1297 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1298
1299 /// Returns the AA info that describes the dereference.
1300 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1301
1302 /// Returns the Ranges that describes the dereference.
1303 const MDNode *getRanges() const { return MMO->getRanges(); }
1304
1305 /// Returns the synchronization scope ID for this memory operation.
1306 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1307
1308 /// Return the atomic ordering requirements for this memory operation. For
1309 /// cmpxchg atomic operations, return the atomic ordering requirements when
1310 /// store occurs.
1311 AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
1312
1313 /// Return true if the memory operation ordering is Unordered or higher.
1314 bool isAtomic() const { return MMO->isAtomic(); }
1315
1316 /// Returns true if the memory operation doesn't imply any ordering
1317 /// constraints on surrounding memory operations beyond the normal memory
1318 /// aliasing rules.
1319 bool isUnordered() const { return MMO->isUnordered(); }
1320
1321 /// Returns true if the memory operation is neither atomic or volatile.
1322 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1323
1324 /// Return the type of the in-memory value.
1325 EVT getMemoryVT() const { return MemoryVT; }
1326
1327 /// Return a MachineMemOperand object describing the memory
1328 /// reference performed by operation.
1329 MachineMemOperand *getMemOperand() const { return MMO; }
1330
1331 const MachinePointerInfo &getPointerInfo() const {
1332 return MMO->getPointerInfo();
1333 }
1334
1335 /// Return the address space for the associated pointer
1336 unsigned getAddressSpace() const {
1337 return getPointerInfo().getAddrSpace();
1338 }
1339
1340 /// Update this MemSDNode's MachineMemOperand information
1341 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1342 /// This must only be used when the new alignment applies to all users of
1343 /// this MachineMemOperand.
1344 void refineAlignment(const MachineMemOperand *NewMMO) {
1345 MMO->refineAlignment(NewMMO);
1346 }
1347
1348 const SDValue &getChain() const { return getOperand(0); }
1349
1350 const SDValue &getBasePtr() const {
1351 switch (getOpcode()) {
1352 case ISD::STORE:
1353 case ISD::MSTORE:
1354 return getOperand(2);
1355 case ISD::MGATHER:
1356 case ISD::MSCATTER:
1357 return getOperand(3);
1358 default:
1359 return getOperand(1);
1360 }
1361 }
1362
1363 // Methods to support isa and dyn_cast
1364 static bool classof(const SDNode *N) {
1365 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1366 // with either an intrinsic or a target opcode.
1367 return N->getOpcode() == ISD::LOAD ||
1368 N->getOpcode() == ISD::STORE ||
1369 N->getOpcode() == ISD::PREFETCH ||
1370 N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1371 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1372 N->getOpcode() == ISD::ATOMIC_SWAP ||
1373 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1374 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1375 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1376 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1377 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1378 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1379 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1380 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1381 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1382 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1383 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1384 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1385 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1386 N->getOpcode() == ISD::ATOMIC_LOAD ||
1387 N->getOpcode() == ISD::ATOMIC_STORE ||
1388 N->getOpcode() == ISD::MLOAD ||
1389 N->getOpcode() == ISD::MSTORE ||
1390 N->getOpcode() == ISD::MGATHER ||
1391 N->getOpcode() == ISD::MSCATTER ||
1392 N->isMemIntrinsic() ||
1393 N->isTargetMemoryOpcode();
1394 }
1395};
1396
1397/// This is an SDNode representing atomic operations.
1398class AtomicSDNode : public MemSDNode {
1399public:
1400 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1401 EVT MemVT, MachineMemOperand *MMO)
1402 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1403 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1404, __PRETTY_FUNCTION__))
1404 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE
) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? static_cast<void> (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1404, __PRETTY_FUNCTION__))
;
1405 }
1406
1407 const SDValue &getBasePtr() const { return getOperand(1); }
1408 const SDValue &getVal() const { return getOperand(2); }
1409
1410 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1411 /// otherwise.
1412 bool isCompareAndSwap() const {
1413 unsigned Op = getOpcode();
1414 return Op == ISD::ATOMIC_CMP_SWAP ||
1415 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1416 }
1417
1418 /// For cmpxchg atomic operations, return the atomic ordering requirements
1419 /// when store does not occur.
1420 AtomicOrdering getFailureOrdering() const {
1421 assert(isCompareAndSwap() && "Must be cmpxchg operation")((isCompareAndSwap() && "Must be cmpxchg operation") ?
static_cast<void> (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1421, __PRETTY_FUNCTION__))
;
1422 return MMO->getFailureOrdering();
1423 }
1424
1425 // Methods to support isa and dyn_cast
1426 static bool classof(const SDNode *N) {
1427 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1428 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1429 N->getOpcode() == ISD::ATOMIC_SWAP ||
1430 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1431 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1432 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1433 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1434 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1435 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1436 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1437 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1438 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1439 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1440 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1441 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1442 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1443 N->getOpcode() == ISD::ATOMIC_LOAD ||
1444 N->getOpcode() == ISD::ATOMIC_STORE;
1445 }
1446};
1447
1448/// This SDNode is used for target intrinsics that touch
1449/// memory and need an associated MachineMemOperand. Its opcode may be
1450/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1451/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1452class MemIntrinsicSDNode : public MemSDNode {
1453public:
1454 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1455 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1456 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1457 SDNodeBits.IsMemIntrinsic = true;
1458 }
1459
1460 // Methods to support isa and dyn_cast
1461 static bool classof(const SDNode *N) {
1462 // We lower some target intrinsics to their target opcode
1463 // early a node with a target opcode can be of this class
1464 return N->isMemIntrinsic() ||
1465 N->getOpcode() == ISD::PREFETCH ||
1466 N->isTargetMemoryOpcode();
1467 }
1468};
1469
1470/// This SDNode is used to implement the code generator
1471/// support for the llvm IR shufflevector instruction. It combines elements
1472/// from two input vectors into a new input vector, with the selection and
1473/// ordering of elements determined by an array of integers, referred to as
1474/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1475/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1476/// An index of -1 is treated as undef, such that the code generator may put
1477/// any value in the corresponding element of the result.
1478class ShuffleVectorSDNode : public SDNode {
1479 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1480 // is freed when the SelectionDAG object is destroyed.
1481 const int *Mask;
1482
1483protected:
1484 friend class SelectionDAG;
1485
1486 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1487 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1488
1489public:
1490 ArrayRef<int> getMask() const {
1491 EVT VT = getValueType(0);
1492 return makeArrayRef(Mask, VT.getVectorNumElements());
1493 }
1494
1495 int getMaskElt(unsigned Idx) const {
1496 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"
) ? static_cast<void> (0) : __assert_fail ("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1496, __PRETTY_FUNCTION__))
;
1497 return Mask[Idx];
1498 }
1499
1500 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1501
1502 int getSplatIndex() const {
1503 assert(isSplat() && "Cannot get splat index for non-splat!")((isSplat() && "Cannot get splat index for non-splat!"
) ? static_cast<void> (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1503, __PRETTY_FUNCTION__))
;
1504 EVT VT = getValueType(0);
1505 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1506 if (Mask[i] >= 0)
1507 return Mask[i];
1508
1509 // We can choose any index value here and be correct because all elements
1510 // are undefined. Return 0 for better potential for callers to simplify.
1511 return 0;
1512 }
1513
1514 static bool isSplatMask(const int *Mask, EVT VT);
1515
1516 /// Change values in a shuffle permute mask assuming
1517 /// the two vector operands have swapped position.
1518 static void commuteMask(MutableArrayRef<int> Mask) {
1519 unsigned NumElems = Mask.size();
1520 for (unsigned i = 0; i != NumElems; ++i) {
1521 int idx = Mask[i];
1522 if (idx < 0)
1523 continue;
1524 else if (idx < (int)NumElems)
1525 Mask[i] = idx + NumElems;
1526 else
1527 Mask[i] = idx - NumElems;
1528 }
1529 }
1530
1531 static bool classof(const SDNode *N) {
1532 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1533 }
1534};
1535
1536class ConstantSDNode : public SDNode {
1537 friend class SelectionDAG;
1538
1539 const ConstantInt *Value;
1540
1541 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1542 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1543 getSDVTList(VT)),
1544 Value(val) {
1545 ConstantSDNodeBits.IsOpaque = isOpaque;
1546 }
1547
1548public:
1549 const ConstantInt *getConstantIntValue() const { return Value; }
1550 const APInt &getAPIntValue() const { return Value->getValue(); }
1551 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1552 int64_t getSExtValue() const { return Value->getSExtValue(); }
1553 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1554 return Value->getLimitedValue(Limit);
1555 }
1556 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1557 Align getAlignValue() const { return Value->getAlignValue(); }
1558
1559 bool isOne() const { return Value->isOne(); }
1560 bool isNullValue() const { return Value->isZero(); }
1561 bool isAllOnesValue() const { return Value->isMinusOne(); }
1562
1563 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1564
1565 static bool classof(const SDNode *N) {
1566 return N->getOpcode() == ISD::Constant ||
1567 N->getOpcode() == ISD::TargetConstant;
1568 }
1569};
1570
1571uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1572 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1573}
1574
1575const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1576 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1577}
1578
1579class ConstantFPSDNode : public SDNode {
1580 friend class SelectionDAG;
1581
1582 const ConstantFP *Value;
1583
1584 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1585 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1586 DebugLoc(), getSDVTList(VT)),
1587 Value(val) {}
1588
1589public:
1590 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1591 const ConstantFP *getConstantFPValue() const { return Value; }
1592
1593 /// Return true if the value is positive or negative zero.
1594 bool isZero() const { return Value->isZero(); }
1595
1596 /// Return true if the value is a NaN.
1597 bool isNaN() const { return Value->isNaN(); }
1598
1599 /// Return true if the value is an infinity
1600 bool isInfinity() const { return Value->isInfinity(); }
1601
1602 /// Return true if the value is negative.
1603 bool isNegative() const { return Value->isNegative(); }
1604
1605 /// We don't rely on operator== working on double values, as
1606 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1607 /// As such, this method can be used to do an exact bit-for-bit comparison of
1608 /// two floating point values.
1609
1610 /// We leave the version with the double argument here because it's just so
1611 /// convenient to write "2.0" and the like. Without this function we'd
1612 /// have to duplicate its logic everywhere it's called.
1613 bool isExactlyValue(double V) const {
1614 return Value->getValueAPF().isExactlyValue(V);
1615 }
1616 bool isExactlyValue(const APFloat& V) const;
1617
1618 static bool isValueValidForType(EVT VT, const APFloat& Val);
1619
1620 static bool classof(const SDNode *N) {
1621 return N->getOpcode() == ISD::ConstantFP ||
1622 N->getOpcode() == ISD::TargetConstantFP;
1623 }
1624};
1625
1626/// Returns true if \p V is a constant integer zero.
1627bool isNullConstant(SDValue V);
1628
1629/// Returns true if \p V is an FP constant with a value of positive zero.
1630bool isNullFPConstant(SDValue V);
1631
1632/// Returns true if \p V is an integer constant with all bits set.
1633bool isAllOnesConstant(SDValue V);
1634
1635/// Returns true if \p V is a constant integer one.
1636bool isOneConstant(SDValue V);
1637
1638/// Return the non-bitcasted source operand of \p V if it exists.
1639/// If \p V is not a bitcasted value, it is returned as-is.
1640SDValue peekThroughBitcasts(SDValue V);
1641
1642/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1643/// If \p V is not a bitcasted one-use value, it is returned as-is.
1644SDValue peekThroughOneUseBitcasts(SDValue V);
1645
1646/// Return the non-extracted vector source operand of \p V if it exists.
1647/// If \p V is not an extracted subvector, it is returned as-is.
1648SDValue peekThroughExtractSubvectors(SDValue V);
1649
1650/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1651/// constant is canonicalized to be operand 1.
1652bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1653
1654/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1655ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1656 bool AllowTruncation = false);
1657
1658/// Returns the SDNode if it is a demanded constant splat BuildVector or
1659/// constant int.
1660ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1661 bool AllowUndefs = false,
1662 bool AllowTruncation = false);
1663
1664/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1665ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1666
1667/// Returns the SDNode if it is a demanded constant splat BuildVector or
1668/// constant float.
1669ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1670 bool AllowUndefs = false);
1671
1672/// Return true if the value is a constant 0 integer or a splatted vector of
1673/// a constant 0 integer (with no undefs by default).
1674/// Build vector implicit truncation is not an issue for null values.
1675bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1676
1677/// Return true if the value is a constant 1 integer or a splatted vector of a
1678/// constant 1 integer (with no undefs).
1679/// Does not permit build vector implicit truncation.
1680bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1681
1682/// Return true if the value is a constant -1 integer or a splatted vector of a
1683/// constant -1 integer (with no undefs).
1684/// Does not permit build vector implicit truncation.
1685bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1686
1687/// Return true if \p V is either a integer or FP constant.
1688inline bool isIntOrFPConstant(SDValue V) {
1689 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1690}
1691
1692class GlobalAddressSDNode : public SDNode {
1693 friend class SelectionDAG;
1694
1695 const GlobalValue *TheGlobal;
1696 int64_t Offset;
1697 unsigned TargetFlags;
1698
1699 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1700 const GlobalValue *GA, EVT VT, int64_t o,
1701 unsigned TF);
1702
1703public:
1704 const GlobalValue *getGlobal() const { return TheGlobal; }
1705 int64_t getOffset() const { return Offset; }
1706 unsigned getTargetFlags() const { return TargetFlags; }
1707 // Return the address space this GlobalAddress belongs to.
1708 unsigned getAddressSpace() const;
1709
1710 static bool classof(const SDNode *N) {
1711 return N->getOpcode() == ISD::GlobalAddress ||
1712 N->getOpcode() == ISD::TargetGlobalAddress ||
1713 N->getOpcode() == ISD::GlobalTLSAddress ||
1714 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1715 }
1716};
1717
1718class FrameIndexSDNode : public SDNode {
1719 friend class SelectionDAG;
1720
1721 int FI;
1722
1723 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1724 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1725 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1726 }
1727
1728public:
1729 int getIndex() const { return FI; }
1730
1731 static bool classof(const SDNode *N) {
1732 return N->getOpcode() == ISD::FrameIndex ||
1733 N->getOpcode() == ISD::TargetFrameIndex;
1734 }
1735};
1736
1737/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1738/// the offet and size that are started/ended in the underlying FrameIndex.
1739class LifetimeSDNode : public SDNode {
1740 friend class SelectionDAG;
1741 int64_t Size;
1742 int64_t Offset; // -1 if offset is unknown.
1743
1744 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1745 SDVTList VTs, int64_t Size, int64_t Offset)
1746 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1747public:
1748 int64_t getFrameIndex() const {
1749 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1750 }
1751
1752 bool hasOffset() const { return Offset >= 0; }
1753 int64_t getOffset() const {
1754 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1754, __PRETTY_FUNCTION__))
;
1755 return Offset;
1756 }
1757 int64_t getSize() const {
1758 assert(hasOffset() && "offset is unknown")((hasOffset() && "offset is unknown") ? static_cast<
void> (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1758, __PRETTY_FUNCTION__))
;
1759 return Size;
1760 }
1761
1762 // Methods to support isa and dyn_cast
1763 static bool classof(const SDNode *N) {
1764 return N->getOpcode() == ISD::LIFETIME_START ||
1765 N->getOpcode() == ISD::LIFETIME_END;
1766 }
1767};
1768
1769/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1770/// the index of the basic block being probed. A pseudo probe serves as a place
1771/// holder and will be removed at the end of compilation. It does not have any
1772/// operand because we do not want the instruction selection to deal with any.
1773class PseudoProbeSDNode : public SDNode {
1774 friend class SelectionDAG;
1775 uint64_t Guid;
1776 uint64_t Index;
1777 uint32_t Attributes;
1778
1779 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1780 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1781 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1782 Attributes(Attr) {}
1783
1784public:
1785 uint64_t getGuid() const { return Guid; }
1786 uint64_t getIndex() const { return Index; }
1787 uint32_t getAttributes() const { return Attributes; }
1788
1789 // Methods to support isa and dyn_cast
1790 static bool classof(const SDNode *N) {
1791 return N->getOpcode() == ISD::PSEUDO_PROBE;
1792 }
1793};
1794
1795class JumpTableSDNode : public SDNode {
1796 friend class SelectionDAG;
1797
1798 int JTI;
1799 unsigned TargetFlags;
1800
1801 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1802 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1803 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1804 }
1805
1806public:
1807 int getIndex() const { return JTI; }
1808 unsigned getTargetFlags() const { return TargetFlags; }
1809
1810 static bool classof(const SDNode *N) {
1811 return N->getOpcode() == ISD::JumpTable ||
1812 N->getOpcode() == ISD::TargetJumpTable;
1813 }
1814};
1815
1816class ConstantPoolSDNode : public SDNode {
1817 friend class SelectionDAG;
1818
1819 union {
1820 const Constant *ConstVal;
1821 MachineConstantPoolValue *MachineCPVal;
1822 } Val;
1823 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1824 Align Alignment; // Minimum alignment requirement of CP.
1825 unsigned TargetFlags;
1826
1827 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1828 Align Alignment, unsigned TF)
1829 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1830 DebugLoc(), getSDVTList(VT)),
1831 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1832 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1832, __PRETTY_FUNCTION__))
;
1833 Val.ConstVal = c;
1834 }
1835
1836 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1837 Align Alignment, unsigned TF)
1838 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1839 DebugLoc(), getSDVTList(VT)),
1840 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1841 assert(Offset >= 0 && "Offset is too large")((Offset >= 0 && "Offset is too large") ? static_cast
<void> (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1841, __PRETTY_FUNCTION__))
;
1842 Val.MachineCPVal = v;
1843 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1844 }
1845
1846public:
1847 bool isMachineConstantPoolEntry() const {
1848 return Offset < 0;
1849 }
1850
1851 const Constant *getConstVal() const {
1852 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((!isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1852, __PRETTY_FUNCTION__))
;
1853 return Val.ConstVal;
1854 }
1855
1856 MachineConstantPoolValue *getMachineCPVal() const {
1857 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((isMachineConstantPoolEntry() && "Wrong constantpool type"
) ? static_cast<void> (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 1857, __PRETTY_FUNCTION__))
;
1858 return Val.MachineCPVal;
1859 }
1860
1861 int getOffset() const {
1862 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1863 }
1864
1865 // Return the alignment of this constant pool object, which is either 0 (for
1866 // default alignment) or the desired value.
1867 Align getAlign() const { return Alignment; }
1868 unsigned getTargetFlags() const { return TargetFlags; }
1869
1870 Type *getType() const;
1871
1872 static bool classof(const SDNode *N) {
1873 return N->getOpcode() == ISD::ConstantPool ||
1874 N->getOpcode() == ISD::TargetConstantPool;
1875 }
1876};
1877
1878/// Completely target-dependent object reference.
1879class TargetIndexSDNode : public SDNode {
1880 friend class SelectionDAG;
1881
1882 unsigned TargetFlags;
1883 int Index;
1884 int64_t Offset;
1885
1886public:
1887 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1888 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1889 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1890
1891 unsigned getTargetFlags() const { return TargetFlags; }
1892 int getIndex() const { return Index; }
1893 int64_t getOffset() const { return Offset; }
1894
1895 static bool classof(const SDNode *N) {
1896 return N->getOpcode() == ISD::TargetIndex;
1897 }
1898};
1899
1900class BasicBlockSDNode : public SDNode {
1901 friend class SelectionDAG;
1902
1903 MachineBasicBlock *MBB;
1904
1905 /// Debug info is meaningful and potentially useful here, but we create
1906 /// blocks out of order when they're jumped to, which makes it a bit
1907 /// harder. Let's see if we need it first.
1908 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1909 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1910 {}
1911
1912public:
1913 MachineBasicBlock *getBasicBlock() const { return MBB; }
1914
1915 static bool classof(const SDNode *N) {
1916 return N->getOpcode() == ISD::BasicBlock;
1917 }
1918};
1919
1920/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1921class BuildVectorSDNode : public SDNode {
1922public:
1923 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1924 explicit BuildVectorSDNode() = delete;
1925
1926 /// Check if this is a constant splat, and if so, find the
1927 /// smallest element size that splats the vector. If MinSplatBits is
1928 /// nonzero, the element size must be at least that large. Note that the
1929 /// splat element may be the entire vector (i.e., a one element vector).
1930 /// Returns the splat element value in SplatValue. Any undefined bits in
1931 /// that value are zero, and the corresponding bits in the SplatUndef mask
1932 /// are set. The SplatBitSize value is set to the splat element size in
1933 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1934 /// undefined. isBigEndian describes the endianness of the target.
1935 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1936 unsigned &SplatBitSize, bool &HasAnyUndefs,
1937 unsigned MinSplatBits = 0,
1938 bool isBigEndian = false) const;
1939
1940 /// Returns the demanded splatted value or a null value if this is not a
1941 /// splat.
1942 ///
1943 /// The DemandedElts mask indicates the elements that must be in the splat.
1944 /// If passed a non-null UndefElements bitvector, it will resize it to match
1945 /// the vector width and set the bits where elements are undef.
1946 SDValue getSplatValue(const APInt &DemandedElts,
1947 BitVector *UndefElements = nullptr) const;
1948
1949 /// Returns the splatted value or a null value if this is not a splat.
1950 ///
1951 /// If passed a non-null UndefElements bitvector, it will resize it to match
1952 /// the vector width and set the bits where elements are undef.
1953 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
1954
1955 /// Find the shortest repeating sequence of values in the build vector.
1956 ///
1957 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1958 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1959 ///
1960 /// Currently this must be a power-of-2 build vector.
1961 /// The DemandedElts mask indicates the elements that must be present,
1962 /// undemanded elements in Sequence may be null (SDValue()). If passed a
1963 /// non-null UndefElements bitvector, it will resize it to match the original
1964 /// vector width and set the bits where elements are undef. If result is
1965 /// false, Sequence will be empty.
1966 bool getRepeatedSequence(const APInt &DemandedElts,
1967 SmallVectorImpl<SDValue> &Sequence,
1968 BitVector *UndefElements = nullptr) const;
1969
1970 /// Find the shortest repeating sequence of values in the build vector.
1971 ///
1972 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
1973 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
1974 ///
1975 /// Currently this must be a power-of-2 build vector.
1976 /// If passed a non-null UndefElements bitvector, it will resize it to match
1977 /// the original vector width and set the bits where elements are undef.
1978 /// If result is false, Sequence will be empty.
1979 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
1980 BitVector *UndefElements = nullptr) const;
1981
1982 /// Returns the demanded splatted constant or null if this is not a constant
1983 /// splat.
1984 ///
1985 /// The DemandedElts mask indicates the elements that must be in the splat.
1986 /// If passed a non-null UndefElements bitvector, it will resize it to match
1987 /// the vector width and set the bits where elements are undef.
1988 ConstantSDNode *
1989 getConstantSplatNode(const APInt &DemandedElts,
1990 BitVector *UndefElements = nullptr) const;
1991
1992 /// Returns the splatted constant or null if this is not a constant
1993 /// splat.
1994 ///
1995 /// If passed a non-null UndefElements bitvector, it will resize it to match
1996 /// the vector width and set the bits where elements are undef.
1997 ConstantSDNode *
1998 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
1999
2000 /// Returns the demanded splatted constant FP or null if this is not a
2001 /// constant FP splat.
2002 ///
2003 /// The DemandedElts mask indicates the elements that must be in the splat.
2004 /// If passed a non-null UndefElements bitvector, it will resize it to match
2005 /// the vector width and set the bits where elements are undef.
2006 ConstantFPSDNode *
2007 getConstantFPSplatNode(const APInt &DemandedElts,
2008 BitVector *UndefElements = nullptr) const;
2009
2010 /// Returns the splatted constant FP or null if this is not a constant
2011 /// FP splat.
2012 ///
2013 /// If passed a non-null UndefElements bitvector, it will resize it to match
2014 /// the vector width and set the bits where elements are undef.
2015 ConstantFPSDNode *
2016 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2017
2018 /// If this is a constant FP splat and the splatted constant FP is an
2019 /// exact power or 2, return the log base 2 integer value. Otherwise,
2020 /// return -1.
2021 ///
2022 /// The BitWidth specifies the necessary bit precision.
2023 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2024 uint32_t BitWidth) const;
2025
2026 bool isConstant() const;
2027
2028 static bool classof(const SDNode *N) {
2029 return N->getOpcode() == ISD::BUILD_VECTOR;
2030 }
2031};
2032
2033/// An SDNode that holds an arbitrary LLVM IR Value. This is
2034/// used when the SelectionDAG needs to make a simple reference to something
2035/// in the LLVM IR representation.
2036///
2037class SrcValueSDNode : public SDNode {
2038 friend class SelectionDAG;
2039
2040 const Value *V;
2041
2042 /// Create a SrcValue for a general value.
2043 explicit SrcValueSDNode(const Value *v)
2044 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2045
2046public:
2047 /// Return the contained Value.
2048 const Value *getValue() const { return V; }
2049
2050 static bool classof(const SDNode *N) {
2051 return N->getOpcode() == ISD::SRCVALUE;
2052 }
2053};
2054
2055class MDNodeSDNode : public SDNode {
2056 friend class SelectionDAG;
2057
2058 const MDNode *MD;
2059
2060 explicit MDNodeSDNode(const MDNode *md)
2061 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2062 {}
2063
2064public:
2065 const MDNode *getMD() const { return MD; }
2066
2067 static bool classof(const SDNode *N) {
2068 return N->getOpcode() == ISD::MDNODE_SDNODE;
2069 }
2070};
2071
2072class RegisterSDNode : public SDNode {
2073 friend class SelectionDAG;
2074
2075 Register Reg;
2076
2077 RegisterSDNode(Register reg, EVT VT)
2078 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2079
2080public:
2081 Register getReg() const { return Reg; }
2082
2083 static bool classof(const SDNode *N) {
2084 return N->getOpcode() == ISD::Register;
2085 }
2086};
2087
2088class RegisterMaskSDNode : public SDNode {
2089 friend class SelectionDAG;
2090
2091 // The memory for RegMask is not owned by the node.
2092 const uint32_t *RegMask;
2093
2094 RegisterMaskSDNode(const uint32_t *mask)
2095 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2096 RegMask(mask) {}
2097
2098public:
2099 const uint32_t *getRegMask() const { return RegMask; }
2100
2101 static bool classof(const SDNode *N) {
2102 return N->getOpcode() == ISD::RegisterMask;
2103 }
2104};
2105
2106class BlockAddressSDNode : public SDNode {
2107 friend class SelectionDAG;
2108
2109 const BlockAddress *BA;
2110 int64_t Offset;
2111 unsigned TargetFlags;
2112
2113 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2114 int64_t o, unsigned Flags)
2115 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2116 BA(ba), Offset(o), TargetFlags(Flags) {}
2117
2118public:
2119 const BlockAddress *getBlockAddress() const { return BA; }
2120 int64_t getOffset() const { return Offset; }
2121 unsigned getTargetFlags() const { return TargetFlags; }
2122
2123 static bool classof(const SDNode *N) {
2124 return N->getOpcode() == ISD::BlockAddress ||
2125 N->getOpcode() == ISD::TargetBlockAddress;
2126 }
2127};
2128
2129class LabelSDNode : public SDNode {
2130 friend class SelectionDAG;
2131
2132 MCSymbol *Label;
2133
2134 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2135 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2136 assert(LabelSDNode::classof(this) && "not a label opcode")((LabelSDNode::classof(this) && "not a label opcode")
? static_cast<void> (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2136, __PRETTY_FUNCTION__))
;
2137 }
2138
2139public:
2140 MCSymbol *getLabel() const { return Label; }
2141
2142 static bool classof(const SDNode *N) {
2143 return N->getOpcode() == ISD::EH_LABEL ||
2144 N->getOpcode() == ISD::ANNOTATION_LABEL;
2145 }
2146};
2147
2148class ExternalSymbolSDNode : public SDNode {
2149 friend class SelectionDAG;
2150
2151 const char *Symbol;
2152 unsigned TargetFlags;
2153
2154 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2155 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2156 DebugLoc(), getSDVTList(VT)),
2157 Symbol(Sym), TargetFlags(TF) {}
2158
2159public:
2160 const char *getSymbol() const { return Symbol; }
2161 unsigned getTargetFlags() const { return TargetFlags; }
2162
2163 static bool classof(const SDNode *N) {
2164 return N->getOpcode() == ISD::ExternalSymbol ||
2165 N->getOpcode() == ISD::TargetExternalSymbol;
2166 }
2167};
2168
2169class MCSymbolSDNode : public SDNode {
2170 friend class SelectionDAG;
2171
2172 MCSymbol *Symbol;
2173
2174 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2175 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2176
2177public:
2178 MCSymbol *getMCSymbol() const { return Symbol; }
2179
2180 static bool classof(const SDNode *N) {
2181 return N->getOpcode() == ISD::MCSymbol;
2182 }
2183};
2184
2185class CondCodeSDNode : public SDNode {
2186 friend class SelectionDAG;
2187
2188 ISD::CondCode Condition;
2189
2190 explicit CondCodeSDNode(ISD::CondCode Cond)
2191 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2192 Condition(Cond) {}
2193
2194public:
2195 ISD::CondCode get() const { return Condition; }
2196
2197 static bool classof(const SDNode *N) {
2198 return N->getOpcode() == ISD::CONDCODE;
2199 }
2200};
2201
2202/// This class is used to represent EVT's, which are used
2203/// to parameterize some operations.
2204class VTSDNode : public SDNode {
2205 friend class SelectionDAG;
2206
2207 EVT ValueType;
2208
2209 explicit VTSDNode(EVT VT)
2210 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2211 ValueType(VT) {}
2212
2213public:
2214 EVT getVT() const { return ValueType; }
2215
2216 static bool classof(const SDNode *N) {
2217 return N->getOpcode() == ISD::VALUETYPE;
2218 }
2219};
2220
2221/// Base class for LoadSDNode and StoreSDNode
2222class LSBaseSDNode : public MemSDNode {
2223public:
2224 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2225 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2226 MachineMemOperand *MMO)
2227 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2228 LSBaseSDNodeBits.AddressingMode = AM;
2229 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2229, __PRETTY_FUNCTION__))
;
2230 }
2231
2232 const SDValue &getOffset() const {
2233 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2234 }
2235
2236 /// Return the addressing mode for this load or store:
2237 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2238 ISD::MemIndexedMode getAddressingMode() const {
2239 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2240 }
2241
2242 /// Return true if this is a pre/post inc/dec load/store.
2243 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2244
2245 /// Return true if this is NOT a pre/post inc/dec load/store.
2246 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2247
2248 static bool classof(const SDNode *N) {
2249 return N->getOpcode() == ISD::LOAD ||
2250 N->getOpcode() == ISD::STORE;
2251 }
2252};
2253
2254/// This class is used to represent ISD::LOAD nodes.
2255class LoadSDNode : public LSBaseSDNode {
2256 friend class SelectionDAG;
2257
2258 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2259 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2260 MachineMemOperand *MMO)
2261 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2262 LoadSDNodeBits.ExtTy = ETy;
2263 assert(readMem() && "Load MachineMemOperand is not a load!")((readMem() && "Load MachineMemOperand is not a load!"
) ? static_cast<void> (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2263, __PRETTY_FUNCTION__))
;
2264 assert(!writeMem() && "Load MachineMemOperand is a store!")((!writeMem() && "Load MachineMemOperand is a store!"
) ? static_cast<void> (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2264, __PRETTY_FUNCTION__))
;
2265 }
2266
2267public:
2268 /// Return whether this is a plain node,
2269 /// or one of the varieties of value-extending loads.
2270 ISD::LoadExtType getExtensionType() const {
2271 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2272 }
2273
2274 const SDValue &getBasePtr() const { return getOperand(1); }
2275 const SDValue &getOffset() const { return getOperand(2); }
2276
2277 static bool classof(const SDNode *N) {
2278 return N->getOpcode() == ISD::LOAD;
2279 }
2280};
2281
2282/// This class is used to represent ISD::STORE nodes.
2283class StoreSDNode : public LSBaseSDNode {
2284 friend class SelectionDAG;
2285
2286 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2287 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2288 MachineMemOperand *MMO)
2289 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2290 StoreSDNodeBits.IsTruncating = isTrunc;
2291 assert(!readMem() && "Store MachineMemOperand is a load!")((!readMem() && "Store MachineMemOperand is a load!")
? static_cast<void> (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2291, __PRETTY_FUNCTION__))
;
2292 assert(writeMem() && "Store MachineMemOperand is not a store!")((writeMem() && "Store MachineMemOperand is not a store!"
) ? static_cast<void> (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2292, __PRETTY_FUNCTION__))
;
2293 }
2294
2295public:
2296 /// Return true if the op does a truncation before store.
2297 /// For integers this is the same as doing a TRUNCATE and storing the result.
2298 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2299 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2300 void setTruncatingStore(bool Truncating) {
2301 StoreSDNodeBits.IsTruncating = Truncating;
2302 }
2303
2304 const SDValue &getValue() const { return getOperand(1); }
2305 const SDValue &getBasePtr() const { return getOperand(2); }
2306 const SDValue &getOffset() const { return getOperand(3); }
2307
2308 static bool classof(const SDNode *N) {
2309 return N->getOpcode() == ISD::STORE;
2310 }
2311};
2312
2313/// This base class is used to represent MLOAD and MSTORE nodes
2314class MaskedLoadStoreSDNode : public MemSDNode {
2315public:
2316 friend class SelectionDAG;
2317
2318 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2319 const DebugLoc &dl, SDVTList VTs,
2320 ISD::MemIndexedMode AM, EVT MemVT,
2321 MachineMemOperand *MMO)
2322 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2323 LSBaseSDNodeBits.AddressingMode = AM;
2324 assert(getAddressingMode() == AM && "Value truncated")((getAddressingMode() == AM && "Value truncated") ? static_cast
<void> (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2324, __PRETTY_FUNCTION__))
;
2325 }
2326
2327 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2328 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2329 // Mask is a vector of i1 elements
2330 const SDValue &getOffset() const {
2331 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2332 }
2333 const SDValue &getMask() const {
2334 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2335 }
2336
2337 /// Return the addressing mode for this load or store:
2338 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2339 ISD::MemIndexedMode getAddressingMode() const {
2340 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2341 }
2342
2343 /// Return true if this is a pre/post inc/dec load/store.
2344 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2345
2346 /// Return true if this is NOT a pre/post inc/dec load/store.
2347 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2348
2349 static bool classof(const SDNode *N) {
2350 return N->getOpcode() == ISD::MLOAD ||
2351 N->getOpcode() == ISD::MSTORE;
2352 }
2353};
2354
2355/// This class is used to represent an MLOAD node
2356class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2357public:
2358 friend class SelectionDAG;
2359
2360 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2361 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2362 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2363 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2364 LoadSDNodeBits.ExtTy = ETy;
2365 LoadSDNodeBits.IsExpanding = IsExpanding;
2366 }
2367
2368 ISD::LoadExtType getExtensionType() const {
2369 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2370 }
2371
2372 const SDValue &getBasePtr() const { return getOperand(1); }
2373 const SDValue &getOffset() const { return getOperand(2); }
2374 const SDValue &getMask() const { return getOperand(3); }
2375 const SDValue &getPassThru() const { return getOperand(4); }
2376
2377 static bool classof(const SDNode *N) {
2378 return N->getOpcode() == ISD::MLOAD;
2379 }
2380
2381 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2382};
2383
2384/// This class is used to represent an MSTORE node
2385class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2386public:
2387 friend class SelectionDAG;
2388
2389 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2390 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2391 EVT MemVT, MachineMemOperand *MMO)
2392 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2393 StoreSDNodeBits.IsTruncating = isTrunc;
2394 StoreSDNodeBits.IsCompressing = isCompressing;
2395 }
2396
2397 /// Return true if the op does a truncation before store.
2398 /// For integers this is the same as doing a TRUNCATE and storing the result.
2399 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2400 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2401
2402 /// Returns true if the op does a compression to the vector before storing.
2403 /// The node contiguously stores the active elements (integers or floats)
2404 /// in src (those with their respective bit set in writemask k) to unaligned
2405 /// memory at base_addr.
2406 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2407
2408 const SDValue &getValue() const { return getOperand(1); }
2409 const SDValue &getBasePtr() const { return getOperand(2); }
2410 const SDValue &getOffset() const { return getOperand(3); }
2411 const SDValue &getMask() const { return getOperand(4); }
2412
2413 static bool classof(const SDNode *N) {
2414 return N->getOpcode() == ISD::MSTORE;
2415 }
2416};
2417
2418/// This is a base class used to represent
2419/// MGATHER and MSCATTER nodes
2420///
2421class MaskedGatherScatterSDNode : public MemSDNode {
2422public:
2423 friend class SelectionDAG;
2424
2425 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2426 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2427 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2428 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2429 LSBaseSDNodeBits.AddressingMode = IndexType;
2430 assert(getIndexType() == IndexType && "Value truncated")((getIndexType() == IndexType && "Value truncated") ?
static_cast<void> (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2430, __PRETTY_FUNCTION__))
;
2431 }
2432
2433 /// How is Index applied to BasePtr when computing addresses.
2434 ISD::MemIndexType getIndexType() const {
2435 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2436 }
2437 void setIndexType(ISD::MemIndexType IndexType) {
2438 LSBaseSDNodeBits.AddressingMode = IndexType;
2439 }
2440 bool isIndexScaled() const {
2441 return (getIndexType() == ISD::SIGNED_SCALED) ||
2442 (getIndexType() == ISD::UNSIGNED_SCALED);
2443 }
2444 bool isIndexSigned() const {
2445 return (getIndexType() == ISD::SIGNED_SCALED) ||
2446 (getIndexType() == ISD::SIGNED_UNSCALED);
2447 }
2448
2449 // In the both nodes address is Op1, mask is Op2:
2450 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2451 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2452 // Mask is a vector of i1 elements
2453 const SDValue &getBasePtr() const { return getOperand(3); }
2454 const SDValue &getIndex() const { return getOperand(4); }
2455 const SDValue &getMask() const { return getOperand(2); }
2456 const SDValue &getScale() const { return getOperand(5); }
2457
2458 static bool classof(const SDNode *N) {
2459 return N->getOpcode() == ISD::MGATHER ||
2460 N->getOpcode() == ISD::MSCATTER;
2461 }
2462};
2463
2464/// This class is used to represent an MGATHER node
2465///
2466class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2467public:
2468 friend class SelectionDAG;
2469
2470 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2471 EVT MemVT, MachineMemOperand *MMO,
2472 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2473 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2474 IndexType) {
2475 LoadSDNodeBits.ExtTy = ETy;
2476 }
2477
2478 const SDValue &getPassThru() const { return getOperand(1); }
2479
2480 ISD::LoadExtType getExtensionType() const {
2481 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2482 }
2483
2484 static bool classof(const SDNode *N) {
2485 return N->getOpcode() == ISD::MGATHER;
2486 }
2487};
2488
2489/// This class is used to represent an MSCATTER node
2490///
2491class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2492public:
2493 friend class SelectionDAG;
2494
2495 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2496 EVT MemVT, MachineMemOperand *MMO,
2497 ISD::MemIndexType IndexType, bool IsTrunc)
2498 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2499 IndexType) {
2500 StoreSDNodeBits.IsTruncating = IsTrunc;
2501 }
2502
2503 /// Return true if the op does a truncation before store.
2504 /// For integers this is the same as doing a TRUNCATE and storing the result.
2505 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2506 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2507
2508 const SDValue &getValue() const { return getOperand(1); }
2509
2510 static bool classof(const SDNode *N) {
2511 return N->getOpcode() == ISD::MSCATTER;
2512 }
2513};
2514
2515/// An SDNode that represents everything that will be needed
2516/// to construct a MachineInstr. These nodes are created during the
2517/// instruction selection proper phase.
2518///
2519/// Note that the only supported way to set the `memoperands` is by calling the
2520/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2521/// inside the DAG rather than in the node.
2522class MachineSDNode : public SDNode {
2523private:
2524 friend class SelectionDAG;
2525
2526 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2527 : SDNode(Opc, Order, DL, VTs) {}
2528
2529 // We use a pointer union between a single `MachineMemOperand` pointer and
2530 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2531 // the number of these is zero, the single pointer variant used when the
2532 // number is one, and the array is used for larger numbers.
2533 //
2534 // The array is allocated via the `SelectionDAG`'s allocator and so will
2535 // always live until the DAG is cleaned up and doesn't require ownership here.
2536 //
2537 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2538 // subclasses aren't managed in a conforming C++ manner. See the comments on
2539 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2540 // constraint here is that these don't manage memory with their constructor or
2541 // destructor and can be initialized to a good state even if they start off
2542 // uninitialized.
2543 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2544
2545 // Note that this could be folded into the above `MemRefs` member if doing so
2546 // is advantageous at some point. We don't need to store this in most cases.
2547 // However, at the moment this doesn't appear to make the allocation any
2548 // smaller and makes the code somewhat simpler to read.
2549 int NumMemRefs = 0;
2550
2551public:
2552 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2553
2554 ArrayRef<MachineMemOperand *> memoperands() const {
2555 // Special case the common cases.
2556 if (NumMemRefs == 0)
2557 return {};
2558 if (NumMemRefs == 1)
2559 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2560
2561 // Otherwise we have an actual array.
2562 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2563 }
2564 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2565 mmo_iterator memoperands_end() const { return memoperands().end(); }
2566 bool memoperands_empty() const { return memoperands().empty(); }
2567
2568 /// Clear out the memory reference descriptor list.
2569 void clearMemRefs() {
2570 MemRefs = nullptr;
2571 NumMemRefs = 0;
2572 }
2573
2574 static bool classof(const SDNode *N) {
2575 return N->isMachineOpcode();
2576 }
2577};
2578
2579/// An SDNode that records if a register contains a value that is guaranteed to
2580/// be aligned accordingly.
2581class AssertAlignSDNode : public SDNode {
2582 Align Alignment;
2583
2584public:
2585 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2586 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2587
2588 Align getAlign() const { return Alignment; }
2589
2590 static bool classof(const SDNode *N) {
2591 return N->getOpcode() == ISD::AssertAlign;
2592 }
2593};
2594
2595class SDNodeIterator {
2596 const SDNode *Node;
2597 unsigned Operand;
2598
2599 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2600
2601public:
2602 using iterator_category = std::forward_iterator_tag;
2603 using value_type = SDNode;
2604 using difference_type = std::ptrdiff_t;
2605 using pointer = value_type *;
2606 using reference = value_type &;
2607
2608 bool operator==(const SDNodeIterator& x) const {
2609 return Operand == x.Operand;
2610 }
2611 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2612
2613 pointer operator*() const {
2614 return Node->getOperand(Operand).getNode();
2615 }
2616 pointer operator->() const { return operator*(); }
2617
2618 SDNodeIterator& operator++() { // Preincrement
2619 ++Operand;
2620 return *this;
2621 }
2622 SDNodeIterator operator++(int) { // Postincrement
2623 SDNodeIterator tmp = *this; ++*this; return tmp;
2624 }
2625 size_t operator-(SDNodeIterator Other) const {
2626 assert(Node == Other.Node &&((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2627, __PRETTY_FUNCTION__))
2627 "Cannot compare iterators of two different nodes!")((Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? static_cast<void> (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2627, __PRETTY_FUNCTION__))
;
2628 return Operand - Other.Operand;
2629 }
2630
2631 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2632 static SDNodeIterator end (const SDNode *N) {
2633 return SDNodeIterator(N, N->getNumOperands());
2634 }
2635
2636 unsigned getOperand() const { return Operand; }
2637 const SDNode *getNode() const { return Node; }
2638};
2639
2640template <> struct GraphTraits<SDNode*> {
2641 using NodeRef = SDNode *;
2642 using ChildIteratorType = SDNodeIterator;
2643
2644 static NodeRef getEntryNode(SDNode *N) { return N; }
2645
2646 static ChildIteratorType child_begin(NodeRef N) {
2647 return SDNodeIterator::begin(N);
2648 }
2649
2650 static ChildIteratorType child_end(NodeRef N) {
2651 return SDNodeIterator::end(N);
2652 }
2653};
2654
2655/// A representation of the largest SDNode, for use in sizeof().
2656///
2657/// This needs to be a union because the largest node differs on 32 bit systems
2658/// with 4 and 8 byte pointer alignment, respectively.
2659using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
2660 BlockAddressSDNode,
2661 GlobalAddressSDNode,
2662 PseudoProbeSDNode>;
2663
2664/// The SDNode class with the greatest alignment requirement.
2665using MostAlignedSDNode = GlobalAddressSDNode;
2666
2667namespace ISD {
2668
2669 /// Returns true if the specified node is a non-extending and unindexed load.
2670 inline bool isNormalLoad(const SDNode *N) {
2671 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
2672 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
2673 Ld->getAddressingMode() == ISD::UNINDEXED;
2674 }
2675
2676 /// Returns true if the specified node is a non-extending load.
2677 inline bool isNON_EXTLoad(const SDNode *N) {
2678 return isa<LoadSDNode>(N) &&
2679 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
2680 }
2681
2682 /// Returns true if the specified node is a EXTLOAD.
2683 inline bool isEXTLoad(const SDNode *N) {
2684 return isa<LoadSDNode>(N) &&
2685 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
2686 }
2687
2688 /// Returns true if the specified node is a SEXTLOAD.
2689 inline bool isSEXTLoad(const SDNode *N) {
2690 return isa<LoadSDNode>(N) &&
2691 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
2692 }
2693
2694 /// Returns true if the specified node is a ZEXTLOAD.
2695 inline bool isZEXTLoad(const SDNode *N) {
2696 return isa<LoadSDNode>(N) &&
2697 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
2698 }
2699
2700 /// Returns true if the specified node is an unindexed load.
2701 inline bool isUNINDEXEDLoad(const SDNode *N) {
2702 return isa<LoadSDNode>(N) &&
2703 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2704 }
2705
2706 /// Returns true if the specified node is a non-truncating
2707 /// and unindexed store.
2708 inline bool isNormalStore(const SDNode *N) {
2709 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
2710 return St && !St->isTruncatingStore() &&
2711 St->getAddressingMode() == ISD::UNINDEXED;
2712 }
2713
2714 /// Returns true if the specified node is a non-truncating store.
2715 inline bool isNON_TRUNCStore(const SDNode *N) {
2716 return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
2717 }
2718
2719 /// Returns true if the specified node is a truncating store.
2720 inline bool isTRUNCStore(const SDNode *N) {
2721 return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
2722 }
2723
2724 /// Returns true if the specified node is an unindexed store.
2725 inline bool isUNINDEXEDStore(const SDNode *N) {
2726 return isa<StoreSDNode>(N) &&
2727 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
2728 }
2729
2730 /// Attempt to match a unary predicate against a scalar/splat constant or
2731 /// every element of a constant BUILD_VECTOR.
2732 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2733 bool matchUnaryPredicate(SDValue Op,
2734 std::function<bool(ConstantSDNode *)> Match,
2735 bool AllowUndefs = false);
2736
2737 /// Attempt to match a binary predicate against a pair of scalar/splat
2738 /// constants or every element of a pair of constant BUILD_VECTORs.
2739 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
2740 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
2741 bool matchBinaryPredicate(
2742 SDValue LHS, SDValue RHS,
2743 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
2744 bool AllowUndefs = false, bool AllowTypeMismatch = false);
2745
2746 /// Returns true if the specified value is the overflow result from one
2747 /// of the overflow intrinsic nodes.
2748 inline bool isOverflowIntrOpRes(SDValue Op) {
2749 unsigned Opc = Op.getOpcode();
2750 return (Op.getResNo() == 1 &&
2751 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2752 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2753 }
2754
2755} // end namespace ISD
2756
2757} // end namespace llvm
2758
2759#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H