Bug Summary

File:build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1147, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ARMISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/ARM -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/ARM -I include -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-09-04-125545-48738-1 -x c++ /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/ARM/ARMISelLowering.cpp

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetTransformInfo.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
59#include "llvm/CodeGen/SelectionDAGNodes.h"
60#include "llvm/CodeGen/TargetInstrInfo.h"
61#include "llvm/CodeGen/TargetLowering.h"
62#include "llvm/CodeGen/TargetOpcodes.h"
63#include "llvm/CodeGen/TargetRegisterInfo.h"
64#include "llvm/CodeGen/TargetSubtargetInfo.h"
65#include "llvm/CodeGen/ValueTypes.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/DerivedTypes.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
76#include "llvm/IR/GlobalVariable.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Instructions.h"
81#include "llvm/IR/IntrinsicInst.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/IntrinsicsARM.h"
84#include "llvm/IR/Module.h"
85#include "llvm/IR/PatternMatch.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
90#include "llvm/MC/MCInstrItineraries.h"
91#include "llvm/MC/MCRegisterInfo.h"
92#include "llvm/MC/MCSchedule.h"
93#include "llvm/Support/AtomicOrdering.h"
94#include "llvm/Support/BranchProbability.h"
95#include "llvm/Support/Casting.h"
96#include "llvm/Support/CodeGen.h"
97#include "llvm/Support/CommandLine.h"
98#include "llvm/Support/Compiler.h"
99#include "llvm/Support/Debug.h"
100#include "llvm/Support/ErrorHandling.h"
101#include "llvm/Support/KnownBits.h"
102#include "llvm/Support/MachineValueType.h"
103#include "llvm/Support/MathExtras.h"
104#include "llvm/Support/raw_ostream.h"
105#include "llvm/Target/TargetMachine.h"
106#include "llvm/Target/TargetOptions.h"
107#include <algorithm>
108#include <cassert>
109#include <cstdint>
110#include <cstdlib>
111#include <iterator>
112#include <limits>
113#include <string>
114#include <tuple>
115#include <utility>
116#include <vector>
117
118using namespace llvm;
119using namespace llvm::PatternMatch;
120
121#define DEBUG_TYPE"arm-isel" "arm-isel"
122
123STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls"}
;
124STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt"}
;
125STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments"}
;
126STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
127 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
;
128
129static cl::opt<bool>
130ARMInterworking("arm-interworking", cl::Hidden,
131 cl::desc("Enable / disable ARM interworking (for debugging only)"),
132 cl::init(true));
133
134static cl::opt<bool> EnableConstpoolPromotion(
135 "arm-promote-constant", cl::Hidden,
136 cl::desc("Enable / disable promotion of unnamed_addr constants into "
137 "constant pools"),
138 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139static cl::opt<unsigned> ConstpoolPromotionMaxSize(
140 "arm-promote-constant-max-size", cl::Hidden,
141 cl::desc("Maximum size of constant to promote into a constant pool"),
142 cl::init(64));
143static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
144 "arm-promote-constant-max-total", cl::Hidden,
145 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
146 cl::init(128));
147
148cl::opt<unsigned>
149MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
150 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
151 cl::init(2));
152
153// The APCS parameter registers.
154static const MCPhysReg GPRArgRegs[] = {
155 ARM::R0, ARM::R1, ARM::R2, ARM::R3
156};
157
158void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
159 if (VT != PromotedLdStVT) {
160 setOperationAction(ISD::LOAD, VT, Promote);
161 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162
163 setOperationAction(ISD::STORE, VT, Promote);
164 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165 }
166
167 MVT ElemTy = VT.getVectorElementType();
168 if (ElemTy != MVT::f64)
169 setOperationAction(ISD::SETCC, VT, Custom);
170 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172 if (ElemTy == MVT::i32) {
173 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177 } else {
178 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182 }
183 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187 setOperationAction(ISD::SELECT, VT, Expand);
188 setOperationAction(ISD::SELECT_CC, VT, Expand);
189 setOperationAction(ISD::VSELECT, VT, Expand);
190 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191 if (VT.isInteger()) {
192 setOperationAction(ISD::SHL, VT, Custom);
193 setOperationAction(ISD::SRA, VT, Custom);
194 setOperationAction(ISD::SRL, VT, Custom);
195 }
196
197 // Neon does not support vector divide/remainder operations.
198 setOperationAction(ISD::SDIV, VT, Expand);
199 setOperationAction(ISD::UDIV, VT, Expand);
200 setOperationAction(ISD::FDIV, VT, Expand);
201 setOperationAction(ISD::SREM, VT, Expand);
202 setOperationAction(ISD::UREM, VT, Expand);
203 setOperationAction(ISD::FREM, VT, Expand);
204 setOperationAction(ISD::SDIVREM, VT, Expand);
205 setOperationAction(ISD::UDIVREM, VT, Expand);
206
207 if (!VT.isFloatingPoint() &&
208 VT != MVT::v2i64 && VT != MVT::v1i64)
209 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210 setOperationAction(Opcode, VT, Legal);
211 if (!VT.isFloatingPoint())
212 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
213 setOperationAction(Opcode, VT, Legal);
214}
215
216void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
217 addRegisterClass(VT, &ARM::DPRRegClass);
218 addTypeForNEON(VT, MVT::f64);
219}
220
221void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
222 addRegisterClass(VT, &ARM::DPairRegClass);
223 addTypeForNEON(VT, MVT::v2f64);
224}
225
226void ARMTargetLowering::setAllExpand(MVT VT) {
227 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
228 setOperationAction(Opc, VT, Expand);
229
230 // We support these really simple operations even on types where all
231 // the actual arithmetic has to be broken down into simpler
232 // operations or turned into library calls.
233 setOperationAction(ISD::BITCAST, VT, Legal);
234 setOperationAction(ISD::LOAD, VT, Legal);
235 setOperationAction(ISD::STORE, VT, Legal);
236 setOperationAction(ISD::UNDEF, VT, Legal);
237}
238
239void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
240 LegalizeAction Action) {
241 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
242 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
244}
245
246void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
247 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
248
249 for (auto VT : IntTypes) {
250 addRegisterClass(VT, &ARM::MQPRRegClass);
251 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
252 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
253 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
254 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255 setOperationAction(ISD::SHL, VT, Custom);
256 setOperationAction(ISD::SRA, VT, Custom);
257 setOperationAction(ISD::SRL, VT, Custom);
258 setOperationAction(ISD::SMIN, VT, Legal);
259 setOperationAction(ISD::SMAX, VT, Legal);
260 setOperationAction(ISD::UMIN, VT, Legal);
261 setOperationAction(ISD::UMAX, VT, Legal);
262 setOperationAction(ISD::ABS, VT, Legal);
263 setOperationAction(ISD::SETCC, VT, Custom);
264 setOperationAction(ISD::MLOAD, VT, Custom);
265 setOperationAction(ISD::MSTORE, VT, Legal);
266 setOperationAction(ISD::CTLZ, VT, Legal);
267 setOperationAction(ISD::CTTZ, VT, Custom);
268 setOperationAction(ISD::BITREVERSE, VT, Legal);
269 setOperationAction(ISD::BSWAP, VT, Legal);
270 setOperationAction(ISD::SADDSAT, VT, Legal);
271 setOperationAction(ISD::UADDSAT, VT, Legal);
272 setOperationAction(ISD::SSUBSAT, VT, Legal);
273 setOperationAction(ISD::USUBSAT, VT, Legal);
274 setOperationAction(ISD::ABDS, VT, Legal);
275 setOperationAction(ISD::ABDU, VT, Legal);
276 setOperationAction(ISD::AVGFLOORS, VT, Legal);
277 setOperationAction(ISD::AVGFLOORU, VT, Legal);
278 setOperationAction(ISD::AVGCEILS, VT, Legal);
279 setOperationAction(ISD::AVGCEILU, VT, Legal);
280
281 // No native support for these.
282 setOperationAction(ISD::UDIV, VT, Expand);
283 setOperationAction(ISD::SDIV, VT, Expand);
284 setOperationAction(ISD::UREM, VT, Expand);
285 setOperationAction(ISD::SREM, VT, Expand);
286 setOperationAction(ISD::UDIVREM, VT, Expand);
287 setOperationAction(ISD::SDIVREM, VT, Expand);
288 setOperationAction(ISD::CTPOP, VT, Expand);
289 setOperationAction(ISD::SELECT, VT, Expand);
290 setOperationAction(ISD::SELECT_CC, VT, Expand);
291
292 // Vector reductions
293 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
294 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
295 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
296 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
297 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
298 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
299 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
300 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
301 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
302
303 if (!HasMVEFP) {
304 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
305 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
306 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
307 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
308 } else {
309 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
310 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
311 }
312
313 // Pre and Post inc are supported on loads and stores
314 for (unsigned im = (unsigned)ISD::PRE_INC;
315 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
316 setIndexedLoadAction(im, VT, Legal);
317 setIndexedStoreAction(im, VT, Legal);
318 setIndexedMaskedLoadAction(im, VT, Legal);
319 setIndexedMaskedStoreAction(im, VT, Legal);
320 }
321 }
322
323 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
324 for (auto VT : FloatTypes) {
325 addRegisterClass(VT, &ARM::MQPRRegClass);
326 if (!HasMVEFP)
327 setAllExpand(VT);
328
329 // These are legal or custom whether we have MVE.fp or not
330 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
331 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
332 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
333 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
334 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
335 setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
336 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
337 setOperationAction(ISD::SETCC, VT, Custom);
338 setOperationAction(ISD::MLOAD, VT, Custom);
339 setOperationAction(ISD::MSTORE, VT, Legal);
340 setOperationAction(ISD::SELECT, VT, Expand);
341 setOperationAction(ISD::SELECT_CC, VT, Expand);
342
343 // Pre and Post inc are supported on loads and stores
344 for (unsigned im = (unsigned)ISD::PRE_INC;
345 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
346 setIndexedLoadAction(im, VT, Legal);
347 setIndexedStoreAction(im, VT, Legal);
348 setIndexedMaskedLoadAction(im, VT, Legal);
349 setIndexedMaskedStoreAction(im, VT, Legal);
350 }
351
352 if (HasMVEFP) {
353 setOperationAction(ISD::FMINNUM, VT, Legal);
354 setOperationAction(ISD::FMAXNUM, VT, Legal);
355 setOperationAction(ISD::FROUND, VT, Legal);
356 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
357 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
358 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
359 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
360
361 // No native support for these.
362 setOperationAction(ISD::FDIV, VT, Expand);
363 setOperationAction(ISD::FREM, VT, Expand);
364 setOperationAction(ISD::FSQRT, VT, Expand);
365 setOperationAction(ISD::FSIN, VT, Expand);
366 setOperationAction(ISD::FCOS, VT, Expand);
367 setOperationAction(ISD::FPOW, VT, Expand);
368 setOperationAction(ISD::FLOG, VT, Expand);
369 setOperationAction(ISD::FLOG2, VT, Expand);
370 setOperationAction(ISD::FLOG10, VT, Expand);
371 setOperationAction(ISD::FEXP, VT, Expand);
372 setOperationAction(ISD::FEXP2, VT, Expand);
373 setOperationAction(ISD::FNEARBYINT, VT, Expand);
374 }
375 }
376
377 // Custom Expand smaller than legal vector reductions to prevent false zero
378 // items being added.
379 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
380 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
381 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
382 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
383 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
384 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
385 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
386 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
387
388 // We 'support' these types up to bitcast/load/store level, regardless of
389 // MVE integer-only / float support. Only doing FP data processing on the FP
390 // vector types is inhibited at integer-only level.
391 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
392 for (auto VT : LongTypes) {
393 addRegisterClass(VT, &ARM::MQPRRegClass);
394 setAllExpand(VT);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
398 setOperationAction(ISD::VSELECT, VT, Legal);
399 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 }
401 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
402
403 // We can do bitwise operations on v2i64 vectors
404 setOperationAction(ISD::AND, MVT::v2i64, Legal);
405 setOperationAction(ISD::OR, MVT::v2i64, Legal);
406 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
407
408 // It is legal to extload from v4i8 to v4i16 or v4i32.
409 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
410 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
411 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
412
413 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
414 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
415 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
416 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
417 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
418 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
419
420 // Some truncating stores are legal too.
421 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
422 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
423 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
424
425 // Pre and Post inc on these are legal, given the correct extends
426 for (unsigned im = (unsigned)ISD::PRE_INC;
427 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
428 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
429 setIndexedLoadAction(im, VT, Legal);
430 setIndexedStoreAction(im, VT, Legal);
431 setIndexedMaskedLoadAction(im, VT, Legal);
432 setIndexedMaskedStoreAction(im, VT, Legal);
433 }
434 }
435
436 // Predicate types
437 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
438 for (auto VT : pTypes) {
439 addRegisterClass(VT, &ARM::VCCRRegClass);
440 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
441 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
442 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
443 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
444 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
445 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
446 setOperationAction(ISD::SETCC, VT, Custom);
447 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
448 setOperationAction(ISD::LOAD, VT, Custom);
449 setOperationAction(ISD::STORE, VT, Custom);
450 setOperationAction(ISD::TRUNCATE, VT, Custom);
451 setOperationAction(ISD::VSELECT, VT, Expand);
452 setOperationAction(ISD::SELECT, VT, Expand);
453 setOperationAction(ISD::SELECT_CC, VT, Expand);
454
455 if (!HasMVEFP) {
456 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
457 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
458 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
459 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
460 }
461 }
462 setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
463 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);
464 setOperationAction(ISD::AND, MVT::v2i1, Expand);
465 setOperationAction(ISD::OR, MVT::v2i1, Expand);
466 setOperationAction(ISD::XOR, MVT::v2i1, Expand);
467 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);
468 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);
469 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);
470 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);
471
472 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
473 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
474 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
475 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
476 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
477 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
478 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
479 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
480}
481
482ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
483 const ARMSubtarget &STI)
484 : TargetLowering(TM), Subtarget(&STI) {
485 RegInfo = Subtarget->getRegisterInfo();
486 Itins = Subtarget->getInstrItineraryData();
487
488 setBooleanContents(ZeroOrOneBooleanContent);
489 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
490
491 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
492 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
493 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
494 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
495 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
496 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
497 : CallingConv::ARM_AAPCS);
498 }
499
500 if (Subtarget->isTargetMachO()) {
501 // Uses VFP for Thumb libfuncs if available.
502 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
503 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
504 static const struct {
505 const RTLIB::Libcall Op;
506 const char * const Name;
507 const ISD::CondCode Cond;
508 } LibraryCalls[] = {
509 // Single-precision floating-point arithmetic.
510 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
511 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
512 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
513 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
514
515 // Double-precision floating-point arithmetic.
516 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
517 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
518 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
519 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
520
521 // Single-precision comparisons.
522 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
523 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
524 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
525 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
526 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
527 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
528 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
529
530 // Double-precision comparisons.
531 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
532 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
533 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
534 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
535 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
536 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
537 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
538
539 // Floating-point to integer conversions.
540 // i64 conversions are done via library routines even when generating VFP
541 // instructions, so use the same ones.
542 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
543 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
544 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
545 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
546
547 // Conversions between floating types.
548 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
549 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
550
551 // Integer to floating-point conversions.
552 // i64 conversions are done via library routines even when generating VFP
553 // instructions, so use the same ones.
554 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
555 // e.g., __floatunsidf vs. __floatunssidfvfp.
556 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
557 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
558 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
559 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
560 };
561
562 for (const auto &LC : LibraryCalls) {
563 setLibcallName(LC.Op, LC.Name);
564 if (LC.Cond != ISD::SETCC_INVALID)
565 setCmpLibcallCC(LC.Op, LC.Cond);
566 }
567 }
568 }
569
570 // These libcalls are not available in 32-bit.
571 setLibcallName(RTLIB::SHL_I128, nullptr);
572 setLibcallName(RTLIB::SRL_I128, nullptr);
573 setLibcallName(RTLIB::SRA_I128, nullptr);
574 setLibcallName(RTLIB::MUL_I128, nullptr);
575 setLibcallName(RTLIB::MULO_I64, nullptr);
576 setLibcallName(RTLIB::MULO_I128, nullptr);
577
578 // RTLIB
579 if (Subtarget->isAAPCS_ABI() &&
580 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
581 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
582 static const struct {
583 const RTLIB::Libcall Op;
584 const char * const Name;
585 const CallingConv::ID CC;
586 const ISD::CondCode Cond;
587 } LibraryCalls[] = {
588 // Double-precision floating-point arithmetic helper functions
589 // RTABI chapter 4.1.2, Table 2
590 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
591 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
592 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594
595 // Double-precision floating-point comparison helper functions
596 // RTABI chapter 4.1.2, Table 3
597 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
598 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
599 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
600 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
601 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
602 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
603 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
604
605 // Single-precision floating-point arithmetic helper functions
606 // RTABI chapter 4.1.2, Table 4
607 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
609 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611
612 // Single-precision floating-point comparison helper functions
613 // RTABI chapter 4.1.2, Table 5
614 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
615 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
616 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
617 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
618 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
619 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
620 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
621
622 // Floating-point to integer conversions.
623 // RTABI chapter 4.1.2, Table 6
624 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632
633 // Conversions between floating types.
634 // RTABI chapter 4.1.2, Table 7
635 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638
639 // Integer to floating-point conversions.
640 // RTABI chapter 4.1.2, Table 8
641 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649
650 // Long long helper functions
651 // RTABI chapter 4.2, Table 9
652 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
653 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
654 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656
657 // Integer division functions
658 // RTABI chapter 4.3.1
659 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
661 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 };
668
669 for (const auto &LC : LibraryCalls) {
670 setLibcallName(LC.Op, LC.Name);
671 setLibcallCallingConv(LC.Op, LC.CC);
672 if (LC.Cond != ISD::SETCC_INVALID)
673 setCmpLibcallCC(LC.Op, LC.Cond);
674 }
675
676 // EABI dependent RTLIB
677 if (TM.Options.EABIVersion == EABI::EABI4 ||
678 TM.Options.EABIVersion == EABI::EABI5) {
679 static const struct {
680 const RTLIB::Libcall Op;
681 const char *const Name;
682 const CallingConv::ID CC;
683 const ISD::CondCode Cond;
684 } MemOpsLibraryCalls[] = {
685 // Memory operations
686 // RTABI chapter 4.3.4
687 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
688 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
689 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690 };
691
692 for (const auto &LC : MemOpsLibraryCalls) {
693 setLibcallName(LC.Op, LC.Name);
694 setLibcallCallingConv(LC.Op, LC.CC);
695 if (LC.Cond != ISD::SETCC_INVALID)
696 setCmpLibcallCC(LC.Op, LC.Cond);
697 }
698 }
699 }
700
701 if (Subtarget->isTargetWindows()) {
702 static const struct {
703 const RTLIB::Libcall Op;
704 const char * const Name;
705 const CallingConv::ID CC;
706 } LibraryCalls[] = {
707 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
708 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
709 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
710 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
711 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
712 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
713 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
715 };
716
717 for (const auto &LC : LibraryCalls) {
718 setLibcallName(LC.Op, LC.Name);
719 setLibcallCallingConv(LC.Op, LC.CC);
720 }
721 }
722
723 // Use divmod compiler-rt calls for iOS 5.0 and later.
724 if (Subtarget->isTargetMachO() &&
725 !(Subtarget->isTargetIOS() &&
726 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
727 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
728 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
729 }
730
731 // The half <-> float conversion functions are always soft-float on
732 // non-watchos platforms, but are needed for some targets which use a
733 // hard-float calling convention by default.
734 if (!Subtarget->isTargetWatchABI()) {
735 if (Subtarget->isAAPCS_ABI()) {
736 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
737 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
738 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
739 } else {
740 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
741 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
742 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
743 }
744 }
745
746 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
747 // a __gnu_ prefix (which is the default).
748 if (Subtarget->isTargetAEABI()) {
749 static const struct {
750 const RTLIB::Libcall Op;
751 const char * const Name;
752 const CallingConv::ID CC;
753 } LibraryCalls[] = {
754 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
755 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
756 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
757 };
758
759 for (const auto &LC : LibraryCalls) {
760 setLibcallName(LC.Op, LC.Name);
761 setLibcallCallingConv(LC.Op, LC.CC);
762 }
763 }
764
765 if (Subtarget->isThumb1Only())
766 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
767 else
768 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
769
770 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
771 Subtarget->hasFPRegs()) {
772 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
773 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
774
775 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
776 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
777 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
778 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
779
780 if (!Subtarget->hasVFP2Base())
781 setAllExpand(MVT::f32);
782 if (!Subtarget->hasFP64())
783 setAllExpand(MVT::f64);
784 }
785
786 if (Subtarget->hasFullFP16()) {
787 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
788 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
789 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
790
791 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
792 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
793 }
794
795 if (Subtarget->hasBF16()) {
796 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
797 setAllExpand(MVT::bf16);
798 if (!Subtarget->hasFullFP16())
799 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
800 }
801
802 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
803 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
804 setTruncStoreAction(VT, InnerVT, Expand);
805 addAllExtLoads(VT, InnerVT, Expand);
806 }
807
808 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
809 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
810
811 setOperationAction(ISD::BSWAP, VT, Expand);
812 }
813
814 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
815 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
816
817 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
818 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
819
820 if (Subtarget->hasMVEIntegerOps())
821 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
822
823 // Combine low-overhead loop intrinsics so that we can lower i1 types.
824 if (Subtarget->hasLOB()) {
825 setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});
826 }
827
828 if (Subtarget->hasNEON()) {
829 addDRTypeForNEON(MVT::v2f32);
830 addDRTypeForNEON(MVT::v8i8);
831 addDRTypeForNEON(MVT::v4i16);
832 addDRTypeForNEON(MVT::v2i32);
833 addDRTypeForNEON(MVT::v1i64);
834
835 addQRTypeForNEON(MVT::v4f32);
836 addQRTypeForNEON(MVT::v2f64);
837 addQRTypeForNEON(MVT::v16i8);
838 addQRTypeForNEON(MVT::v8i16);
839 addQRTypeForNEON(MVT::v4i32);
840 addQRTypeForNEON(MVT::v2i64);
841
842 if (Subtarget->hasFullFP16()) {
843 addQRTypeForNEON(MVT::v8f16);
844 addDRTypeForNEON(MVT::v4f16);
845 }
846
847 if (Subtarget->hasBF16()) {
848 addQRTypeForNEON(MVT::v8bf16);
849 addDRTypeForNEON(MVT::v4bf16);
850 }
851 }
852
853 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
854 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
855 // none of Neon, MVE or VFP supports any arithmetic operations on it.
856 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
857 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
858 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
859 // FIXME: Code duplication: FDIV and FREM are expanded always, see
860 // ARMTargetLowering::addTypeForNEON method for details.
861 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
862 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
863 // FIXME: Create unittest.
864 // In another words, find a way when "copysign" appears in DAG with vector
865 // operands.
866 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
867 // FIXME: Code duplication: SETCC has custom operation action, see
868 // ARMTargetLowering::addTypeForNEON method for details.
869 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
870 // FIXME: Create unittest for FNEG and for FABS.
871 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
872 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
873 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
874 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
875 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
876 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
877 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
878 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
879 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
880 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
881 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
882 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
883 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
884 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
885 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
886 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
887 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
888 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
889 }
890
891 if (Subtarget->hasNEON()) {
892 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
893 // supported for v4f32.
894 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
895 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
896 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
897 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
898 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
899 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
900 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
901 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
902 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
903 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
904 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
905 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
906 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
907 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
908
909 // Mark v2f32 intrinsics.
910 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
911 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
912 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
913 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
914 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
915 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
916 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
917 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
918 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
919 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
920 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
921 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
922 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
923 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
924
925 // Neon does not support some operations on v1i64 and v2i64 types.
926 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
927 // Custom handling for some quad-vector types to detect VMULL.
928 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
929 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
930 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
931 // Custom handling for some vector types to avoid expensive expansions
932 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
933 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
934 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
935 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
936 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
937 // a destination type that is wider than the source, and nor does
938 // it have a FP_TO_[SU]INT instruction with a narrower destination than
939 // source.
940 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
941 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
942 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
943 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
944 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
945 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
946 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
947 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
948
949 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
950 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
951
952 // NEON does not have single instruction CTPOP for vectors with element
953 // types wider than 8-bits. However, custom lowering can leverage the
954 // v8i8/v16i8 vcnt instruction.
955 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
956 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
957 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
958 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
959 setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
960 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
961
962 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
963 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
964
965 // NEON does not have single instruction CTTZ for vectors.
966 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
967 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
968 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
969 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
970
971 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
972 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
973 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
974 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
975
976 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
977 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
978 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
979 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
980
981 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
982 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
983 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
984 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
985
986 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
987 setOperationAction(ISD::MULHS, VT, Expand);
988 setOperationAction(ISD::MULHU, VT, Expand);
989 }
990
991 // NEON only has FMA instructions as of VFP4.
992 if (!Subtarget->hasVFP4Base()) {
993 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
994 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
995 }
996
997 setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT,
998 ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD});
999
1000 // It is legal to extload from v4i8 to v4i16 or v4i32.
1001 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1002 MVT::v2i32}) {
1003 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
1004 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
1005 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
1006 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
1007 }
1008 }
1009 }
1010
1011 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1012 setTargetDAGCombine(
1013 {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR,
1014 ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1015 ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,
1016 ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
1017 ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});
1018 }
1019 if (Subtarget->hasMVEIntegerOps()) {
1020 setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
1021 ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,
1022 ISD::SETCC});
1023 }
1024 if (Subtarget->hasMVEFloatOps()) {
1025 setTargetDAGCombine(ISD::FADD);
1026 }
1027
1028 if (!Subtarget->hasFP64()) {
1029 // When targeting a floating-point unit with only single-precision
1030 // operations, f64 is legal for the few double-precision instructions which
1031 // are present However, no double-precision operations other than moves,
1032 // loads and stores are provided by the hardware.
1033 setOperationAction(ISD::FADD, MVT::f64, Expand);
1034 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1035 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1036 setOperationAction(ISD::FMA, MVT::f64, Expand);
1037 setOperationAction(ISD::FDIV, MVT::f64, Expand);
1038 setOperationAction(ISD::FREM, MVT::f64, Expand);
1039 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
1040 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
1041 setOperationAction(ISD::FNEG, MVT::f64, Expand);
1042 setOperationAction(ISD::FABS, MVT::f64, Expand);
1043 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
1044 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1045 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1046 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1047 setOperationAction(ISD::FLOG, MVT::f64, Expand);
1048 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
1049 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
1050 setOperationAction(ISD::FEXP, MVT::f64, Expand);
1051 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
1052 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
1053 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
1054 setOperationAction(ISD::FRINT, MVT::f64, Expand);
1055 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
1056 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
1057 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
1058 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
1059 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
1060 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
1061 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
1062 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
1063 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
1064 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
1065 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
1066 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
1067 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
1068 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
1069 }
1070
1071 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1072 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
1073 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
1074 if (Subtarget->hasFullFP16()) {
1075 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1076 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1077 }
1078 }
1079
1080 if (!Subtarget->hasFP16()) {
1081 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
1082 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
1083 }
1084
1085 computeRegisterProperties(Subtarget->getRegisterInfo());
1086
1087 // ARM does not have floating-point extending loads.
1088 for (MVT VT : MVT::fp_valuetypes()) {
1089 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1090 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1091 }
1092
1093 // ... or truncating stores
1094 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1095 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1096 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1097
1098 // ARM does not have i1 sign extending load.
1099 for (MVT VT : MVT::integer_valuetypes())
1100 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1101
1102 // ARM supports all 4 flavors of integer indexed load / store.
1103 if (!Subtarget->isThumb1Only()) {
1104 for (unsigned im = (unsigned)ISD::PRE_INC;
1105 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1106 setIndexedLoadAction(im, MVT::i1, Legal);
1107 setIndexedLoadAction(im, MVT::i8, Legal);
1108 setIndexedLoadAction(im, MVT::i16, Legal);
1109 setIndexedLoadAction(im, MVT::i32, Legal);
1110 setIndexedStoreAction(im, MVT::i1, Legal);
1111 setIndexedStoreAction(im, MVT::i8, Legal);
1112 setIndexedStoreAction(im, MVT::i16, Legal);
1113 setIndexedStoreAction(im, MVT::i32, Legal);
1114 }
1115 } else {
1116 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1117 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1118 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1119 }
1120
1121 setOperationAction(ISD::SADDO, MVT::i32, Custom);
1122 setOperationAction(ISD::UADDO, MVT::i32, Custom);
1123 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
1124 setOperationAction(ISD::USUBO, MVT::i32, Custom);
1125
1126 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
1127 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1128 if (Subtarget->hasDSP()) {
1129 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1130 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1131 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1132 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1133 setOperationAction(ISD::UADDSAT, MVT::i8, Custom);
1134 setOperationAction(ISD::USUBSAT, MVT::i8, Custom);
1135 setOperationAction(ISD::UADDSAT, MVT::i16, Custom);
1136 setOperationAction(ISD::USUBSAT, MVT::i16, Custom);
1137 }
1138 if (Subtarget->hasBaseDSP()) {
1139 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1140 setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
1141 }
1142
1143 // i64 operation support.
1144 setOperationAction(ISD::MUL, MVT::i64, Expand);
1145 setOperationAction(ISD::MULHU, MVT::i32, Expand);
1146 if (Subtarget->isThumb1Only()) {
1147 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
1148 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
1149 }
1150 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1151 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1152 setOperationAction(ISD::MULHS, MVT::i32, Expand);
1153
1154 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
1155 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
1156 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
1157 setOperationAction(ISD::SRL, MVT::i64, Custom);
1158 setOperationAction(ISD::SRA, MVT::i64, Custom);
1159 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1160 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1161 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1162 setOperationAction(ISD::STORE, MVT::i64, Custom);
1163
1164 // MVE lowers 64 bit shifts to lsll and lsrl
1165 // assuming that ISD::SRL and SRA of i64 are already marked custom
1166 if (Subtarget->hasMVEIntegerOps())
1167 setOperationAction(ISD::SHL, MVT::i64, Custom);
1168
1169 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1170 if (Subtarget->isThumb1Only()) {
1171 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
1172 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
1173 setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
1174 }
1175
1176 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1177 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1178
1179 // ARM does not have ROTL.
1180 setOperationAction(ISD::ROTL, MVT::i32, Expand);
1181 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1182 setOperationAction(ISD::ROTL, VT, Expand);
1183 setOperationAction(ISD::ROTR, VT, Expand);
1184 }
1185 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1186 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1187 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1188 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
1189 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
1190 }
1191
1192 // @llvm.readcyclecounter requires the Performance Monitors extension.
1193 // Default to the 0 expansion on unsupported platforms.
1194 // FIXME: Technically there are older ARM CPUs that have
1195 // implementation-specific ways of obtaining this information.
1196 if (Subtarget->hasPerfMon())
1197 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1198
1199 // Only ARMv6 has BSWAP.
1200 if (!Subtarget->hasV6Ops())
1201 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
1202
1203 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1204 : Subtarget->hasDivideInARMMode();
1205 if (!hasDivide) {
1206 // These are expanded into libcalls if the cpu doesn't have HW divider.
1207 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
1208 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
1209 }
1210
1211 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1212 setOperationAction(ISD::SDIV, MVT::i32, Custom);
1213 setOperationAction(ISD::UDIV, MVT::i32, Custom);
1214
1215 setOperationAction(ISD::SDIV, MVT::i64, Custom);
1216 setOperationAction(ISD::UDIV, MVT::i64, Custom);
1217 }
1218
1219 setOperationAction(ISD::SREM, MVT::i32, Expand);
1220 setOperationAction(ISD::UREM, MVT::i32, Expand);
1221
1222 // Register based DivRem for AEABI (RTABI 4.2)
1223 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1224 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1225 Subtarget->isTargetWindows()) {
1226 setOperationAction(ISD::SREM, MVT::i64, Custom);
1227 setOperationAction(ISD::UREM, MVT::i64, Custom);
1228 HasStandaloneRem = false;
1229
1230 if (Subtarget->isTargetWindows()) {
1231 const struct {
1232 const RTLIB::Libcall Op;
1233 const char * const Name;
1234 const CallingConv::ID CC;
1235 } LibraryCalls[] = {
1236 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1237 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1238 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1239 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1240
1241 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1242 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1243 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1244 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1245 };
1246
1247 for (const auto &LC : LibraryCalls) {
1248 setLibcallName(LC.Op, LC.Name);
1249 setLibcallCallingConv(LC.Op, LC.CC);
1250 }
1251 } else {
1252 const struct {
1253 const RTLIB::Libcall Op;
1254 const char * const Name;
1255 const CallingConv::ID CC;
1256 } LibraryCalls[] = {
1257 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1258 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1259 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1260 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1261
1262 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1263 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1264 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1265 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1266 };
1267
1268 for (const auto &LC : LibraryCalls) {
1269 setLibcallName(LC.Op, LC.Name);
1270 setLibcallCallingConv(LC.Op, LC.CC);
1271 }
1272 }
1273
1274 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
1275 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
1276 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
1277 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
1278 } else {
1279 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
1280 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
1281 }
1282
1283 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1284 // MSVCRT doesn't have powi; fall back to pow
1285 setLibcallName(RTLIB::POWI_F32, nullptr);
1286 setLibcallName(RTLIB::POWI_F64, nullptr);
1287 }
1288
1289 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1290 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1291 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1292 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1293
1294 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1295 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1296
1297 // Use the default implementation.
1298 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1299 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1300 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1301 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1302 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1303 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1304
1305 if (Subtarget->isTargetWindows())
1306 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1307 else
1308 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1309
1310 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1311 // the default expansion.
1312 InsertFencesForAtomic = false;
1313 if (Subtarget->hasAnyDataBarrier() &&
1314 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1315 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1316 // to ldrex/strex loops already.
1317 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1318 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1319 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1320
1321 // On v8, we have particularly efficient implementations of atomic fences
1322 // if they can be combined with nearby atomic loads and stores.
1323 if (!Subtarget->hasAcquireRelease() ||
1324 getTargetMachine().getOptLevel() == 0) {
1325 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1326 InsertFencesForAtomic = true;
1327 }
1328 } else {
1329 // If there's anything we can use as a barrier, go through custom lowering
1330 // for ATOMIC_FENCE.
1331 // If target has DMB in thumb, Fences can be inserted.
1332 if (Subtarget->hasDataBarrier())
1333 InsertFencesForAtomic = true;
1334
1335 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1336 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1337
1338 // Set them all for expansion, which will force libcalls.
1339 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1340 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1341 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1342 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1343 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1344 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1345 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1346 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1347 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1348 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1349 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1350 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1351 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1352 // Unordered/Monotonic case.
1353 if (!InsertFencesForAtomic) {
1354 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1355 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1356 }
1357 }
1358
1359 // Compute supported atomic widths.
1360 if (Subtarget->isTargetLinux() ||
1361 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1362 // For targets where __sync_* routines are reliably available, we use them
1363 // if necessary.
1364 //
1365 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1366 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1367 //
1368 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1369 // such targets should provide __sync_* routines, which use the ARM mode
1370 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1371 // encoding; see ARMISD::MEMBARRIER_MCR.)
1372 setMaxAtomicSizeInBitsSupported(64);
1373 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1374 Subtarget->hasForced32BitAtomics()) {
1375 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1376 setMaxAtomicSizeInBitsSupported(32);
1377 } else {
1378 // We can't assume anything about other targets; just use libatomic
1379 // routines.
1380 setMaxAtomicSizeInBitsSupported(0);
1381 }
1382
1383 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1384
1385 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1386 if (!Subtarget->hasV6Ops()) {
1387 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1389 }
1390 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1391
1392 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1393 !Subtarget->isThumb1Only()) {
1394 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1395 // iff target supports vfp2.
1396 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1397 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1398 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1399 }
1400
1401 // We want to custom lower some of our intrinsics.
1402 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1403 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1404 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1405 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1406 if (Subtarget->useSjLjEH())
1407 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1408
1409 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1410 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1411 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1412 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1413 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1414 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1415 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1416 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1417 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1418 if (Subtarget->hasFullFP16()) {
1419 setOperationAction(ISD::SETCC, MVT::f16, Expand);
1420 setOperationAction(ISD::SELECT, MVT::f16, Custom);
1421 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1422 }
1423
1424 setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1425
1426 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1427 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1428 if (Subtarget->hasFullFP16())
1429 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1430 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1431 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1432 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1433
1434 // We don't support sin/cos/fmod/copysign/pow
1435 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1436 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1437 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1438 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1439 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1440 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1441 setOperationAction(ISD::FREM, MVT::f64, Expand);
1442 setOperationAction(ISD::FREM, MVT::f32, Expand);
1443 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1444 !Subtarget->isThumb1Only()) {
1445 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1446 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1447 }
1448 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1449 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1450
1451 if (!Subtarget->hasVFP4Base()) {
1452 setOperationAction(ISD::FMA, MVT::f64, Expand);
1453 setOperationAction(ISD::FMA, MVT::f32, Expand);
1454 }
1455
1456 // Various VFP goodness
1457 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1458 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1459 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1460 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1461 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1462 }
1463
1464 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1465 if (!Subtarget->hasFP16()) {
1466 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1467 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1468 }
1469
1470 // Strict floating-point comparisons need custom lowering.
1471 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1472 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1473 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
1474 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
1475 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
1476 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
1477 }
1478
1479 // Use __sincos_stret if available.
1480 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1481 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1482 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1483 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1484 }
1485
1486 // FP-ARMv8 implements a lot of rounding-like FP operations.
1487 if (Subtarget->hasFPARMv8Base()) {
1488 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1489 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1490 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1491 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1492 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1493 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1494 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1495 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1496 if (Subtarget->hasNEON()) {
1497 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1498 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1499 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1500 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1501 }
1502
1503 if (Subtarget->hasFP64()) {
1504 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1505 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1506 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1507 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1508 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1509 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1510 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1511 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1512 }
1513 }
1514
1515 // FP16 often need to be promoted to call lib functions
1516 if (Subtarget->hasFullFP16()) {
1517 setOperationAction(ISD::FREM, MVT::f16, Promote);
1518 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
1519 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1520 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1521 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1522 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1523 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1524 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1525 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1526 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1527 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1528 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1529
1530 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1531 }
1532
1533 if (Subtarget->hasNEON()) {
1534 // vmin and vmax aren't available in a scalar form, so we can use
1535 // a NEON instruction with an undef lane instead. This has a performance
1536 // penalty on some cores, so we don't do this unless we have been
1537 // asked to by the core tuning model.
1538 if (Subtarget->useNEONForSinglePrecisionFP()) {
1539 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1540 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1541 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1542 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1543 }
1544 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1545 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1546 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1547 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1548
1549 if (Subtarget->hasFullFP16()) {
1550 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1551 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1552 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1553 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1554
1555 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1556 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1557 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1558 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1559 }
1560 }
1561
1562 // We have target-specific dag combine patterns for the following nodes:
1563 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1564 setTargetDAGCombine(
1565 {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR});
1566
1567 if (Subtarget->hasMVEIntegerOps())
1568 setTargetDAGCombine(ISD::VSELECT);
1569
1570 if (Subtarget->hasV6Ops())
1571 setTargetDAGCombine(ISD::SRL);
1572 if (Subtarget->isThumb1Only())
1573 setTargetDAGCombine(ISD::SHL);
1574 // Attempt to lower smin/smax to ssat/usat
1575 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1576 Subtarget->isThumb2()) {
1577 setTargetDAGCombine({ISD::SMIN, ISD::SMAX});
1578 }
1579
1580 setStackPointerRegisterToSaveRestore(ARM::SP);
1581
1582 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1583 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1584 setSchedulingPreference(Sched::RegPressure);
1585 else
1586 setSchedulingPreference(Sched::Hybrid);
1587
1588 //// temporary - rewrite interface to use type
1589 MaxStoresPerMemset = 8;
1590 MaxStoresPerMemsetOptSize = 4;
1591 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1592 MaxStoresPerMemcpyOptSize = 2;
1593 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1594 MaxStoresPerMemmoveOptSize = 2;
1595
1596 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1597 // are at least 4 bytes aligned.
1598 setMinStackArgumentAlignment(Align(4));
1599
1600 // Prefer likely predicted branches to selects on out-of-order cores.
1601 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1602
1603 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1604
1605 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1606
1607 if (Subtarget->isThumb() || Subtarget->isThumb2())
1608 setTargetDAGCombine(ISD::ABS);
1609}
1610
1611bool ARMTargetLowering::useSoftFloat() const {
1612 return Subtarget->useSoftFloat();
1613}
1614
1615// FIXME: It might make sense to define the representative register class as the
1616// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1617// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1618// SPR's representative would be DPR_VFP2. This should work well if register
1619// pressure tracking were modified such that a register use would increment the
1620// pressure of the register class's representative and all of it's super
1621// classes' representatives transitively. We have not implemented this because
1622// of the difficulty prior to coalescing of modeling operand register classes
1623// due to the common occurrence of cross class copies and subregister insertions
1624// and extractions.
1625std::pair<const TargetRegisterClass *, uint8_t>
1626ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1627 MVT VT) const {
1628 const TargetRegisterClass *RRC = nullptr;
1629 uint8_t Cost = 1;
1630 switch (VT.SimpleTy) {
1631 default:
1632 return TargetLowering::findRepresentativeClass(TRI, VT);
1633 // Use DPR as representative register class for all floating point
1634 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1635 // the cost is 1 for both f32 and f64.
1636 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1637 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1638 RRC = &ARM::DPRRegClass;
1639 // When NEON is used for SP, only half of the register file is available
1640 // because operations that define both SP and DP results will be constrained
1641 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1642 // coalescing by double-counting the SP regs. See the FIXME above.
1643 if (Subtarget->useNEONForSinglePrecisionFP())
1644 Cost = 2;
1645 break;
1646 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1647 case MVT::v4f32: case MVT::v2f64:
1648 RRC = &ARM::DPRRegClass;
1649 Cost = 2;
1650 break;
1651 case MVT::v4i64:
1652 RRC = &ARM::DPRRegClass;
1653 Cost = 4;
1654 break;
1655 case MVT::v8i64:
1656 RRC = &ARM::DPRRegClass;
1657 Cost = 8;
1658 break;
1659 }
1660 return std::make_pair(RRC, Cost);
1661}
1662
1663const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1664#define MAKE_CASE(V) \
1665 case V: \
1666 return #V;
1667 switch ((ARMISD::NodeType)Opcode) {
1668 case ARMISD::FIRST_NUMBER:
1669 break;
1670 MAKE_CASE(ARMISD::Wrapper)
1671 MAKE_CASE(ARMISD::WrapperPIC)
1672 MAKE_CASE(ARMISD::WrapperJT)
1673 MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
1674 MAKE_CASE(ARMISD::CALL)
1675 MAKE_CASE(ARMISD::CALL_PRED)
1676 MAKE_CASE(ARMISD::CALL_NOLINK)
1677 MAKE_CASE(ARMISD::tSECALL)
1678 MAKE_CASE(ARMISD::t2CALL_BTI)
1679 MAKE_CASE(ARMISD::BRCOND)
1680 MAKE_CASE(ARMISD::BR_JT)
1681 MAKE_CASE(ARMISD::BR2_JT)
1682 MAKE_CASE(ARMISD::RET_FLAG)
1683 MAKE_CASE(ARMISD::SERET_FLAG)
1684 MAKE_CASE(ARMISD::INTRET_FLAG)
1685 MAKE_CASE(ARMISD::PIC_ADD)
1686 MAKE_CASE(ARMISD::CMP)
1687 MAKE_CASE(ARMISD::CMN)
1688 MAKE_CASE(ARMISD::CMPZ)
1689 MAKE_CASE(ARMISD::CMPFP)
1690 MAKE_CASE(ARMISD::CMPFPE)
1691 MAKE_CASE(ARMISD::CMPFPw0)
1692 MAKE_CASE(ARMISD::CMPFPEw0)
1693 MAKE_CASE(ARMISD::BCC_i64)
1694 MAKE_CASE(ARMISD::FMSTAT)
1695 MAKE_CASE(ARMISD::CMOV)
1696 MAKE_CASE(ARMISD::SUBS)
1697 MAKE_CASE(ARMISD::SSAT)
1698 MAKE_CASE(ARMISD::USAT)
1699 MAKE_CASE(ARMISD::ASRL)
1700 MAKE_CASE(ARMISD::LSRL)
1701 MAKE_CASE(ARMISD::LSLL)
1702 MAKE_CASE(ARMISD::SRL_FLAG)
1703 MAKE_CASE(ARMISD::SRA_FLAG)
1704 MAKE_CASE(ARMISD::RRX)
1705 MAKE_CASE(ARMISD::ADDC)
1706 MAKE_CASE(ARMISD::ADDE)
1707 MAKE_CASE(ARMISD::SUBC)
1708 MAKE_CASE(ARMISD::SUBE)
1709 MAKE_CASE(ARMISD::LSLS)
1710 MAKE_CASE(ARMISD::VMOVRRD)
1711 MAKE_CASE(ARMISD::VMOVDRR)
1712 MAKE_CASE(ARMISD::VMOVhr)
1713 MAKE_CASE(ARMISD::VMOVrh)
1714 MAKE_CASE(ARMISD::VMOVSR)
1715 MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
1716 MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
1717 MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
1718 MAKE_CASE(ARMISD::TC_RETURN)
1719 MAKE_CASE(ARMISD::THREAD_POINTER)
1720 MAKE_CASE(ARMISD::DYN_ALLOC)
1721 MAKE_CASE(ARMISD::MEMBARRIER_MCR)
1722 MAKE_CASE(ARMISD::PRELOAD)
1723 MAKE_CASE(ARMISD::LDRD)
1724 MAKE_CASE(ARMISD::STRD)
1725 MAKE_CASE(ARMISD::WIN__CHKSTK)
1726 MAKE_CASE(ARMISD::WIN__DBZCHK)
1727 MAKE_CASE(ARMISD::PREDICATE_CAST)
1728 MAKE_CASE(ARMISD::VECTOR_REG_CAST)
1729 MAKE_CASE(ARMISD::MVESEXT)
1730 MAKE_CASE(ARMISD::MVEZEXT)
1731 MAKE_CASE(ARMISD::MVETRUNC)
1732 MAKE_CASE(ARMISD::VCMP)
1733 MAKE_CASE(ARMISD::VCMPZ)
1734 MAKE_CASE(ARMISD::VTST)
1735 MAKE_CASE(ARMISD::VSHLs)
1736 MAKE_CASE(ARMISD::VSHLu)
1737 MAKE_CASE(ARMISD::VSHLIMM)
1738 MAKE_CASE(ARMISD::VSHRsIMM)
1739 MAKE_CASE(ARMISD::VSHRuIMM)
1740 MAKE_CASE(ARMISD::VRSHRsIMM)
1741 MAKE_CASE(ARMISD::VRSHRuIMM)
1742 MAKE_CASE(ARMISD::VRSHRNIMM)
1743 MAKE_CASE(ARMISD::VQSHLsIMM)
1744 MAKE_CASE(ARMISD::VQSHLuIMM)
1745 MAKE_CASE(ARMISD::VQSHLsuIMM)
1746 MAKE_CASE(ARMISD::VQSHRNsIMM)
1747 MAKE_CASE(ARMISD::VQSHRNuIMM)
1748 MAKE_CASE(ARMISD::VQSHRNsuIMM)
1749 MAKE_CASE(ARMISD::VQRSHRNsIMM)
1750 MAKE_CASE(ARMISD::VQRSHRNuIMM)
1751 MAKE_CASE(ARMISD::VQRSHRNsuIMM)
1752 MAKE_CASE(ARMISD::VSLIIMM)
1753 MAKE_CASE(ARMISD::VSRIIMM)
1754 MAKE_CASE(ARMISD::VGETLANEu)
1755 MAKE_CASE(ARMISD::VGETLANEs)
1756 MAKE_CASE(ARMISD::VMOVIMM)
1757 MAKE_CASE(ARMISD::VMVNIMM)
1758 MAKE_CASE(ARMISD::VMOVFPIMM)
1759 MAKE_CASE(ARMISD::VDUP)
1760 MAKE_CASE(ARMISD::VDUPLANE)
1761 MAKE_CASE(ARMISD::VEXT)
1762 MAKE_CASE(ARMISD::VREV64)
1763 MAKE_CASE(ARMISD::VREV32)
1764 MAKE_CASE(ARMISD::VREV16)
1765 MAKE_CASE(ARMISD::VZIP)
1766 MAKE_CASE(ARMISD::VUZP)
1767 MAKE_CASE(ARMISD::VTRN)
1768 MAKE_CASE(ARMISD::VTBL1)
1769 MAKE_CASE(ARMISD::VTBL2)
1770 MAKE_CASE(ARMISD::VMOVN)
1771 MAKE_CASE(ARMISD::VQMOVNs)
1772 MAKE_CASE(ARMISD::VQMOVNu)
1773 MAKE_CASE(ARMISD::VCVTN)
1774 MAKE_CASE(ARMISD::VCVTL)
1775 MAKE_CASE(ARMISD::VIDUP)
1776 MAKE_CASE(ARMISD::VMULLs)
1777 MAKE_CASE(ARMISD::VMULLu)
1778 MAKE_CASE(ARMISD::VQDMULH)
1779 MAKE_CASE(ARMISD::VADDVs)
1780 MAKE_CASE(ARMISD::VADDVu)
1781 MAKE_CASE(ARMISD::VADDVps)
1782 MAKE_CASE(ARMISD::VADDVpu)
1783 MAKE_CASE(ARMISD::VADDLVs)
1784 MAKE_CASE(ARMISD::VADDLVu)
1785 MAKE_CASE(ARMISD::VADDLVAs)
1786 MAKE_CASE(ARMISD::VADDLVAu)
1787 MAKE_CASE(ARMISD::VADDLVps)
1788 MAKE_CASE(ARMISD::VADDLVpu)
1789 MAKE_CASE(ARMISD::VADDLVAps)
1790 MAKE_CASE(ARMISD::VADDLVApu)
1791 MAKE_CASE(ARMISD::VMLAVs)
1792 MAKE_CASE(ARMISD::VMLAVu)
1793 MAKE_CASE(ARMISD::VMLAVps)
1794 MAKE_CASE(ARMISD::VMLAVpu)
1795 MAKE_CASE(ARMISD::VMLALVs)
1796 MAKE_CASE(ARMISD::VMLALVu)
1797 MAKE_CASE(ARMISD::VMLALVps)
1798 MAKE_CASE(ARMISD::VMLALVpu)
1799 MAKE_CASE(ARMISD::VMLALVAs)
1800 MAKE_CASE(ARMISD::VMLALVAu)
1801 MAKE_CASE(ARMISD::VMLALVAps)
1802 MAKE_CASE(ARMISD::VMLALVApu)
1803 MAKE_CASE(ARMISD::VMINVu)
1804 MAKE_CASE(ARMISD::VMINVs)
1805 MAKE_CASE(ARMISD::VMAXVu)
1806 MAKE_CASE(ARMISD::VMAXVs)
1807 MAKE_CASE(ARMISD::UMAAL)
1808 MAKE_CASE(ARMISD::UMLAL)
1809 MAKE_CASE(ARMISD::SMLAL)
1810 MAKE_CASE(ARMISD::SMLALBB)
1811 MAKE_CASE(ARMISD::SMLALBT)
1812 MAKE_CASE(ARMISD::SMLALTB)
1813 MAKE_CASE(ARMISD::SMLALTT)
1814 MAKE_CASE(ARMISD::SMULWB)
1815 MAKE_CASE(ARMISD::SMULWT)
1816 MAKE_CASE(ARMISD::SMLALD)
1817 MAKE_CASE(ARMISD::SMLALDX)
1818 MAKE_CASE(ARMISD::SMLSLD)
1819 MAKE_CASE(ARMISD::SMLSLDX)
1820 MAKE_CASE(ARMISD::SMMLAR)
1821 MAKE_CASE(ARMISD::SMMLSR)
1822 MAKE_CASE(ARMISD::QADD16b)
1823 MAKE_CASE(ARMISD::QSUB16b)
1824 MAKE_CASE(ARMISD::QADD8b)
1825 MAKE_CASE(ARMISD::QSUB8b)
1826 MAKE_CASE(ARMISD::UQADD16b)
1827 MAKE_CASE(ARMISD::UQSUB16b)
1828 MAKE_CASE(ARMISD::UQADD8b)
1829 MAKE_CASE(ARMISD::UQSUB8b)
1830 MAKE_CASE(ARMISD::BUILD_VECTOR)
1831 MAKE_CASE(ARMISD::BFI)
1832 MAKE_CASE(ARMISD::VORRIMM)
1833 MAKE_CASE(ARMISD::VBICIMM)
1834 MAKE_CASE(ARMISD::VBSP)
1835 MAKE_CASE(ARMISD::MEMCPY)
1836 MAKE_CASE(ARMISD::VLD1DUP)
1837 MAKE_CASE(ARMISD::VLD2DUP)
1838 MAKE_CASE(ARMISD::VLD3DUP)
1839 MAKE_CASE(ARMISD::VLD4DUP)
1840 MAKE_CASE(ARMISD::VLD1_UPD)
1841 MAKE_CASE(ARMISD::VLD2_UPD)
1842 MAKE_CASE(ARMISD::VLD3_UPD)
1843 MAKE_CASE(ARMISD::VLD4_UPD)
1844 MAKE_CASE(ARMISD::VLD1x2_UPD)
1845 MAKE_CASE(ARMISD::VLD1x3_UPD)
1846 MAKE_CASE(ARMISD::VLD1x4_UPD)
1847 MAKE_CASE(ARMISD::VLD2LN_UPD)
1848 MAKE_CASE(ARMISD::VLD3LN_UPD)
1849 MAKE_CASE(ARMISD::VLD4LN_UPD)
1850 MAKE_CASE(ARMISD::VLD1DUP_UPD)
1851 MAKE_CASE(ARMISD::VLD2DUP_UPD)
1852 MAKE_CASE(ARMISD::VLD3DUP_UPD)
1853 MAKE_CASE(ARMISD::VLD4DUP_UPD)
1854 MAKE_CASE(ARMISD::VST1_UPD)
1855 MAKE_CASE(ARMISD::VST2_UPD)
1856 MAKE_CASE(ARMISD::VST3_UPD)
1857 MAKE_CASE(ARMISD::VST4_UPD)
1858 MAKE_CASE(ARMISD::VST1x2_UPD)
1859 MAKE_CASE(ARMISD::VST1x3_UPD)
1860 MAKE_CASE(ARMISD::VST1x4_UPD)
1861 MAKE_CASE(ARMISD::VST2LN_UPD)
1862 MAKE_CASE(ARMISD::VST3LN_UPD)
1863 MAKE_CASE(ARMISD::VST4LN_UPD)
1864 MAKE_CASE(ARMISD::WLS)
1865 MAKE_CASE(ARMISD::WLSSETUP)
1866 MAKE_CASE(ARMISD::LE)
1867 MAKE_CASE(ARMISD::LOOP_DEC)
1868 MAKE_CASE(ARMISD::CSINV)
1869 MAKE_CASE(ARMISD::CSNEG)
1870 MAKE_CASE(ARMISD::CSINC)
1871 MAKE_CASE(ARMISD::MEMCPYLOOP)
1872 MAKE_CASE(ARMISD::MEMSETLOOP)
1873#undef MAKE_CASE
1874 }
1875 return nullptr;
1876}
1877
1878EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1879 EVT VT) const {
1880 if (!VT.isVector())
1881 return getPointerTy(DL);
1882
1883 // MVE has a predicate register.
1884 if ((Subtarget->hasMVEIntegerOps() &&
1885 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1886 VT == MVT::v16i8)) ||
1887 (Subtarget->hasMVEFloatOps() &&
1888 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1889 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1890 return VT.changeVectorElementTypeToInteger();
1891}
1892
1893/// getRegClassFor - Return the register class that should be used for the
1894/// specified value type.
1895const TargetRegisterClass *
1896ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1897 (void)isDivergent;
1898 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1899 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1900 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1901 // MVE Q registers.
1902 if (Subtarget->hasNEON()) {
1903 if (VT == MVT::v4i64)
1904 return &ARM::QQPRRegClass;
1905 if (VT == MVT::v8i64)
1906 return &ARM::QQQQPRRegClass;
1907 }
1908 if (Subtarget->hasMVEIntegerOps()) {
1909 if (VT == MVT::v4i64)
1910 return &ARM::MQQPRRegClass;
1911 if (VT == MVT::v8i64)
1912 return &ARM::MQQQQPRRegClass;
1913 }
1914 return TargetLowering::getRegClassFor(VT);
1915}
1916
1917// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1918// source/dest is aligned and the copy size is large enough. We therefore want
1919// to align such objects passed to memory intrinsics.
1920bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1921 Align &PrefAlign) const {
1922 if (!isa<MemIntrinsic>(CI))
1923 return false;
1924 MinSize = 8;
1925 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1926 // cycle faster than 4-byte aligned LDM.
1927 PrefAlign =
1928 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1929 return true;
1930}
1931
1932// Create a fast isel object.
1933FastISel *
1934ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1935 const TargetLibraryInfo *libInfo) const {
1936 return ARM::createFastISel(funcInfo, libInfo);
1937}
1938
1939Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1940 unsigned NumVals = N->getNumValues();
1941 if (!NumVals)
1942 return Sched::RegPressure;
1943
1944 for (unsigned i = 0; i != NumVals; ++i) {
1945 EVT VT = N->getValueType(i);
1946 if (VT == MVT::Glue || VT == MVT::Other)
1947 continue;
1948 if (VT.isFloatingPoint() || VT.isVector())
1949 return Sched::ILP;
1950 }
1951
1952 if (!N->isMachineOpcode())
1953 return Sched::RegPressure;
1954
1955 // Load are scheduled for latency even if there instruction itinerary
1956 // is not available.
1957 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1958 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1959
1960 if (MCID.getNumDefs() == 0)
1961 return Sched::RegPressure;
1962 if (!Itins->isEmpty() &&
1963 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1964 return Sched::ILP;
1965
1966 return Sched::RegPressure;
1967}
1968
1969//===----------------------------------------------------------------------===//
1970// Lowering Code
1971//===----------------------------------------------------------------------===//
1972
1973static bool isSRL16(const SDValue &Op) {
1974 if (Op.getOpcode() != ISD::SRL)
1975 return false;
1976 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1977 return Const->getZExtValue() == 16;
1978 return false;
1979}
1980
1981static bool isSRA16(const SDValue &Op) {
1982 if (Op.getOpcode() != ISD::SRA)
1983 return false;
1984 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1985 return Const->getZExtValue() == 16;
1986 return false;
1987}
1988
1989static bool isSHL16(const SDValue &Op) {
1990 if (Op.getOpcode() != ISD::SHL)
1991 return false;
1992 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1993 return Const->getZExtValue() == 16;
1994 return false;
1995}
1996
1997// Check for a signed 16-bit value. We special case SRA because it makes it
1998// more simple when also looking for SRAs that aren't sign extending a
1999// smaller value. Without the check, we'd need to take extra care with
2000// checking order for some operations.
2001static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2002 if (isSRA16(Op))
2003 return isSHL16(Op.getOperand(0));
2004 return DAG.ComputeNumSignBits(Op) == 17;
2005}
2006
2007/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2008static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
2009 switch (CC) {
2010 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2010)
;
2011 case ISD::SETNE: return ARMCC::NE;
2012 case ISD::SETEQ: return ARMCC::EQ;
2013 case ISD::SETGT: return ARMCC::GT;
2014 case ISD::SETGE: return ARMCC::GE;
2015 case ISD::SETLT: return ARMCC::LT;
2016 case ISD::SETLE: return ARMCC::LE;
2017 case ISD::SETUGT: return ARMCC::HI;
2018 case ISD::SETUGE: return ARMCC::HS;
2019 case ISD::SETULT: return ARMCC::LO;
2020 case ISD::SETULE: return ARMCC::LS;
2021 }
2022}
2023
2024/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2025static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
2026 ARMCC::CondCodes &CondCode2) {
2027 CondCode2 = ARMCC::AL;
2028 switch (CC) {
2029 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2029)
;
2030 case ISD::SETEQ:
2031 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2032 case ISD::SETGT:
2033 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2034 case ISD::SETGE:
2035 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2036 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2037 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2038 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2039 case ISD::SETO: CondCode = ARMCC::VC; break;
2040 case ISD::SETUO: CondCode = ARMCC::VS; break;
2041 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2042 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2043 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2044 case ISD::SETLT:
2045 case ISD::SETULT: CondCode = ARMCC::LT; break;
2046 case ISD::SETLE:
2047 case ISD::SETULE: CondCode = ARMCC::LE; break;
2048 case ISD::SETNE:
2049 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2050 }
2051}
2052
2053//===----------------------------------------------------------------------===//
2054// Calling Convention Implementation
2055//===----------------------------------------------------------------------===//
2056
2057/// getEffectiveCallingConv - Get the effective calling convention, taking into
2058/// account presence of floating point hardware and calling convention
2059/// limitations, such as support for variadic functions.
2060CallingConv::ID
2061ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2062 bool isVarArg) const {
2063 switch (CC) {
2064 default:
2065 report_fatal_error("Unsupported calling convention");
2066 case CallingConv::ARM_AAPCS:
2067 case CallingConv::ARM_APCS:
2068 case CallingConv::GHC:
2069 case CallingConv::CFGuard_Check:
2070 return CC;
2071 case CallingConv::PreserveMost:
2072 return CallingConv::PreserveMost;
2073 case CallingConv::ARM_AAPCS_VFP:
2074 case CallingConv::Swift:
2075 case CallingConv::SwiftTail:
2076 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
2077 case CallingConv::C:
2078 case CallingConv::Tail:
2079 if (!Subtarget->isAAPCS_ABI())
2080 return CallingConv::ARM_APCS;
2081 else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2082 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2083 !isVarArg)
2084 return CallingConv::ARM_AAPCS_VFP;
2085 else
2086 return CallingConv::ARM_AAPCS;
2087 case CallingConv::Fast:
2088 case CallingConv::CXX_FAST_TLS:
2089 if (!Subtarget->isAAPCS_ABI()) {
2090 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2091 return CallingConv::Fast;
2092 return CallingConv::ARM_APCS;
2093 } else if (Subtarget->hasVFP2Base() &&
2094 !Subtarget->isThumb1Only() && !isVarArg)
2095 return CallingConv::ARM_AAPCS_VFP;
2096 else
2097 return CallingConv::ARM_AAPCS;
2098 }
2099}
2100
2101CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2102 bool isVarArg) const {
2103 return CCAssignFnForNode(CC, false, isVarArg);
2104}
2105
2106CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
2107 bool isVarArg) const {
2108 return CCAssignFnForNode(CC, true, isVarArg);
2109}
2110
2111/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2112/// CallingConvention.
2113CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2114 bool Return,
2115 bool isVarArg) const {
2116 switch (getEffectiveCallingConv(CC, isVarArg)) {
2117 default:
2118 report_fatal_error("Unsupported calling convention");
2119 case CallingConv::ARM_APCS:
2120 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2121 case CallingConv::ARM_AAPCS:
2122 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2123 case CallingConv::ARM_AAPCS_VFP:
2124 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2125 case CallingConv::Fast:
2126 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2127 case CallingConv::GHC:
2128 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2129 case CallingConv::PreserveMost:
2130 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2131 case CallingConv::CFGuard_Check:
2132 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2133 }
2134}
2135
2136SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2137 MVT LocVT, MVT ValVT, SDValue Val) const {
2138 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2139 Val);
2140 if (Subtarget->hasFullFP16()) {
2141 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2142 } else {
2143 Val = DAG.getNode(ISD::TRUNCATE, dl,
2144 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2145 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2146 }
2147 return Val;
2148}
2149
2150SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2151 MVT LocVT, MVT ValVT,
2152 SDValue Val) const {
2153 if (Subtarget->hasFullFP16()) {
2154 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2155 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2156 } else {
2157 Val = DAG.getNode(ISD::BITCAST, dl,
2158 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2159 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2160 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2161 }
2162 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2163}
2164
2165/// LowerCallResult - Lower the result values of a call into the
2166/// appropriate copies out of appropriate physical registers.
2167SDValue ARMTargetLowering::LowerCallResult(
2168 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2169 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2170 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2171 SDValue ThisVal) const {
2172 // Assign locations to each value returned by this call.
2173 SmallVector<CCValAssign, 16> RVLocs;
2174 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2175 *DAG.getContext());
2176 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2177
2178 // Copy all of the result registers out of their specified physreg.
2179 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2180 CCValAssign VA = RVLocs[i];
2181
2182 // Pass 'this' value directly from the argument to return value, to avoid
2183 // reg unit interference
2184 if (i == 0 && isThisReturn) {
2185 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2186, __extension__
__PRETTY_FUNCTION__))
2186 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2186, __extension__
__PRETTY_FUNCTION__))
;
2187 InVals.push_back(ThisVal);
2188 continue;
2189 }
2190
2191 SDValue Val;
2192 if (VA.needsCustom() &&
2193 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2194 // Handle f64 or half of a v2f64.
2195 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2196 InFlag);
2197 Chain = Lo.getValue(1);
2198 InFlag = Lo.getValue(2);
2199 VA = RVLocs[++i]; // skip ahead to next loc
2200 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2201 InFlag);
2202 Chain = Hi.getValue(1);
2203 InFlag = Hi.getValue(2);
2204 if (!Subtarget->isLittle())
2205 std::swap (Lo, Hi);
2206 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2207
2208 if (VA.getLocVT() == MVT::v2f64) {
2209 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2210 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2211 DAG.getConstant(0, dl, MVT::i32));
2212
2213 VA = RVLocs[++i]; // skip ahead to next loc
2214 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2215 Chain = Lo.getValue(1);
2216 InFlag = Lo.getValue(2);
2217 VA = RVLocs[++i]; // skip ahead to next loc
2218 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2219 Chain = Hi.getValue(1);
2220 InFlag = Hi.getValue(2);
2221 if (!Subtarget->isLittle())
2222 std::swap (Lo, Hi);
2223 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2224 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2225 DAG.getConstant(1, dl, MVT::i32));
2226 }
2227 } else {
2228 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2229 InFlag);
2230 Chain = Val.getValue(1);
2231 InFlag = Val.getValue(2);
2232 }
2233
2234 switch (VA.getLocInfo()) {
2235 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2235)
;
2236 case CCValAssign::Full: break;
2237 case CCValAssign::BCvt:
2238 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2239 break;
2240 }
2241
2242 // f16 arguments have their size extended to 4 bytes and passed as if they
2243 // had been copied to the LSBs of a 32-bit register.
2244 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2245 if (VA.needsCustom() &&
2246 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2247 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2248
2249 InVals.push_back(Val);
2250 }
2251
2252 return Chain;
2253}
2254
2255std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2256 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2257 bool IsTailCall, int SPDiff) const {
2258 SDValue DstAddr;
2259 MachinePointerInfo DstInfo;
2260 int32_t Offset = VA.getLocMemOffset();
2261 MachineFunction &MF = DAG.getMachineFunction();
2262
2263 if (IsTailCall) {
2264 Offset += SPDiff;
2265 auto PtrVT = getPointerTy(DAG.getDataLayout());
2266 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2267 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2268 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2269 DstInfo =
2270 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
2271 } else {
2272 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2273 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2274 StackPtr, PtrOff);
2275 DstInfo =
2276 MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset);
2277 }
2278
2279 return std::make_pair(DstAddr, DstInfo);
2280}
2281
2282void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2283 SDValue Chain, SDValue &Arg,
2284 RegsToPassVector &RegsToPass,
2285 CCValAssign &VA, CCValAssign &NextVA,
2286 SDValue &StackPtr,
2287 SmallVectorImpl<SDValue> &MemOpChains,
2288 bool IsTailCall,
2289 int SPDiff) const {
2290 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2291 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2292 unsigned id = Subtarget->isLittle() ? 0 : 1;
2293 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2294
2295 if (NextVA.isRegLoc())
2296 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2297 else {
2298 assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail
("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2298, __extension__ __PRETTY_FUNCTION__))
;
2299 if (!StackPtr.getNode())
2300 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2301 getPointerTy(DAG.getDataLayout()));
2302
2303 SDValue DstAddr;
2304 MachinePointerInfo DstInfo;
2305 std::tie(DstAddr, DstInfo) =
2306 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2307 MemOpChains.push_back(
2308 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2309 }
2310}
2311
2312static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2313 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2314 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
2315}
2316
2317/// LowerCall - Lowering a call into a callseq_start <-
2318/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2319/// nodes.
2320SDValue
2321ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2322 SmallVectorImpl<SDValue> &InVals) const {
2323 SelectionDAG &DAG = CLI.DAG;
2324 SDLoc &dl = CLI.DL;
2325 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2326 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2327 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2328 SDValue Chain = CLI.Chain;
2329 SDValue Callee = CLI.Callee;
2330 bool &isTailCall = CLI.IsTailCall;
2331 CallingConv::ID CallConv = CLI.CallConv;
2332 bool doesNotRet = CLI.DoesNotReturn;
2333 bool isVarArg = CLI.IsVarArg;
2334
2335 MachineFunction &MF = DAG.getMachineFunction();
2336 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2337 MachineFunction::CallSiteInfo CSInfo;
2338 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2339 bool isThisReturn = false;
2340 bool isCmseNSCall = false;
2341 bool isSibCall = false;
2342 bool PreferIndirect = false;
2343 bool GuardWithBTI = false;
2344
2345 // Lower 'returns_twice' calls to a pseudo-instruction.
2346 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2347 !Subtarget->noBTIAtReturnTwice())
2348 GuardWithBTI = AFI->branchTargetEnforcement();
2349
2350 // Determine whether this is a non-secure function call.
2351 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2352 isCmseNSCall = true;
2353
2354 // Disable tail calls if they're not supported.
2355 if (!Subtarget->supportsTailCall())
2356 isTailCall = false;
2357
2358 // For both the non-secure calls and the returns from a CMSE entry function,
2359 // the function needs to do some extra work afte r the call, or before the
2360 // return, respectively, thus it cannot end with atail call
2361 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2362 isTailCall = false;
2363
2364 if (isa<GlobalAddressSDNode>(Callee)) {
2365 // If we're optimizing for minimum size and the function is called three or
2366 // more times in this block, we can improve codesize by calling indirectly
2367 // as BLXr has a 16-bit encoding.
2368 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2369 if (CLI.CB) {
2370 auto *BB = CLI.CB->getParent();
2371 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2372 count_if(GV->users(), [&BB](const User *U) {
2373 return isa<Instruction>(U) &&
2374 cast<Instruction>(U)->getParent() == BB;
2375 }) > 2;
2376 }
2377 }
2378 if (isTailCall) {
2379 // Check if it's really possible to do a tail call.
2380 isTailCall = IsEligibleForTailCallOptimization(
2381 Callee, CallConv, isVarArg, isStructRet,
2382 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2383 PreferIndirect);
2384
2385 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2386 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2387 isSibCall = true;
2388
2389 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2390 // detected sibcalls.
2391 if (isTailCall)
2392 ++NumTailCalls;
2393 }
2394
2395 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2396 report_fatal_error("failed to perform tail call elimination on a call "
2397 "site marked musttail");
2398 // Analyze operands of the call, assigning locations to each operand.
2399 SmallVector<CCValAssign, 16> ArgLocs;
2400 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2401 *DAG.getContext());
2402 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2403
2404 // Get a count of how many bytes are to be pushed on the stack.
2405 unsigned NumBytes = CCInfo.getNextStackOffset();
2406
2407 // SPDiff is the byte offset of the call's argument area from the callee's.
2408 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2409 // by this amount for a tail call. In a sibling call it must be 0 because the
2410 // caller will deallocate the entire stack and the callee still expects its
2411 // arguments to begin at SP+0. Completely unused for non-tail calls.
2412 int SPDiff = 0;
2413
2414 if (isTailCall && !isSibCall) {
2415 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2416 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2417
2418 // Since callee will pop argument stack as a tail call, we must keep the
2419 // popped size 16-byte aligned.
2420 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2421 NumBytes = alignTo(NumBytes, StackAlign);
2422
2423 // SPDiff will be negative if this tail call requires more space than we
2424 // would automatically have in our incoming argument space. Positive if we
2425 // can actually shrink the stack.
2426 SPDiff = NumReusableBytes - NumBytes;
2427
2428 // If this call requires more stack than we have available from
2429 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2430 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2431 AFI->setArgRegsSaveSize(-SPDiff);
2432 }
2433
2434 if (isSibCall) {
2435 // For sibling tail calls, memory operands are available in our caller's stack.
2436 NumBytes = 0;
2437 } else {
2438 // Adjust the stack pointer for the new arguments...
2439 // These operations are automatically eliminated by the prolog/epilog pass
2440 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2441 }
2442
2443 SDValue StackPtr =
2444 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2445
2446 RegsToPassVector RegsToPass;
2447 SmallVector<SDValue, 8> MemOpChains;
2448
2449 // During a tail call, stores to the argument area must happen after all of
2450 // the function's incoming arguments have been loaded because they may alias.
2451 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2452 // there's no point in doing so repeatedly so this tracks whether that's
2453 // happened yet.
2454 bool AfterFormalArgLoads = false;
2455
2456 // Walk the register/memloc assignments, inserting copies/loads. In the case
2457 // of tail call optimization, arguments are handled later.
2458 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2459 i != e;
2460 ++i, ++realArgIdx) {
2461 CCValAssign &VA = ArgLocs[i];
2462 SDValue Arg = OutVals[realArgIdx];
2463 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2464 bool isByVal = Flags.isByVal();
2465
2466 // Promote the value if needed.
2467 switch (VA.getLocInfo()) {
2468 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2468)
;
2469 case CCValAssign::Full: break;
2470 case CCValAssign::SExt:
2471 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2472 break;
2473 case CCValAssign::ZExt:
2474 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2475 break;
2476 case CCValAssign::AExt:
2477 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2478 break;
2479 case CCValAssign::BCvt:
2480 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2481 break;
2482 }
2483
2484 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2485 Chain = DAG.getStackArgumentTokenFactor(Chain);
2486 AfterFormalArgLoads = true;
2487 }
2488
2489 // f16 arguments have their size extended to 4 bytes and passed as if they
2490 // had been copied to the LSBs of a 32-bit register.
2491 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2492 if (VA.needsCustom() &&
2493 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2494 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2495 } else {
2496 // f16 arguments could have been extended prior to argument lowering.
2497 // Mask them arguments if this is a CMSE nonsecure call.
2498 auto ArgVT = Outs[realArgIdx].ArgVT;
2499 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2500 auto LocBits = VA.getLocVT().getSizeInBits();
2501 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2502 SDValue Mask =
2503 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2504 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2505 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2506 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2507 }
2508 }
2509
2510 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2511 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2512 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2513 DAG.getConstant(0, dl, MVT::i32));
2514 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2515 DAG.getConstant(1, dl, MVT::i32));
2516
2517 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2518 StackPtr, MemOpChains, isTailCall, SPDiff);
2519
2520 VA = ArgLocs[++i]; // skip ahead to next loc
2521 if (VA.isRegLoc()) {
2522 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2523 StackPtr, MemOpChains, isTailCall, SPDiff);
2524 } else {
2525 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2525, __extension__ __PRETTY_FUNCTION__))
;
2526 SDValue DstAddr;
2527 MachinePointerInfo DstInfo;
2528 std::tie(DstAddr, DstInfo) =
2529 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2530 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2531 }
2532 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2533 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2534 StackPtr, MemOpChains, isTailCall, SPDiff);
2535 } else if (VA.isRegLoc()) {
2536 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2537 Outs[0].VT == MVT::i32) {
2538 assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2539, __extension__
__PRETTY_FUNCTION__))
2539 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2539, __extension__
__PRETTY_FUNCTION__))
;
2540 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2541, __extension__
__PRETTY_FUNCTION__))
2541 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2541, __extension__
__PRETTY_FUNCTION__))
;
2542 isThisReturn = true;
2543 }
2544 const TargetOptions &Options = DAG.getTarget().Options;
2545 if (Options.EmitCallSiteInfo)
2546 CSInfo.emplace_back(VA.getLocReg(), i);
2547 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2548 } else if (isByVal) {
2549 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2549, __extension__ __PRETTY_FUNCTION__))
;
2550 unsigned offset = 0;
2551
2552 // True if this byval aggregate will be split between registers
2553 // and memory.
2554 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2555 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2556
2557 if (CurByValIdx < ByValArgsCount) {
2558
2559 unsigned RegBegin, RegEnd;
2560 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2561
2562 EVT PtrVT =
2563 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2564 unsigned int i, j;
2565 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2566 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2567 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2568 SDValue Load =
2569 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2570 DAG.InferPtrAlign(AddArg));
2571 MemOpChains.push_back(Load.getValue(1));
2572 RegsToPass.push_back(std::make_pair(j, Load));
2573 }
2574
2575 // If parameter size outsides register area, "offset" value
2576 // helps us to calculate stack slot for remained part properly.
2577 offset = RegEnd - RegBegin;
2578
2579 CCInfo.nextInRegsParam();
2580 }
2581
2582 if (Flags.getByValSize() > 4*offset) {
2583 auto PtrVT = getPointerTy(DAG.getDataLayout());
2584 SDValue Dst;
2585 MachinePointerInfo DstInfo;
2586 std::tie(Dst, DstInfo) =
2587 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2588 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2589 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2590 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2591 MVT::i32);
2592 SDValue AlignNode =
2593 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2594
2595 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2596 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2597 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2598 Ops));
2599 }
2600 } else {
2601 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
2601, __extension__ __PRETTY_FUNCTION__))
;
2602 SDValue DstAddr;
2603 MachinePointerInfo DstInfo;
2604 std::tie(DstAddr, DstInfo) =
2605 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2606
2607 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2608 MemOpChains.push_back(Store);
2609 }
2610 }
2611
2612 if (!MemOpChains.empty())
2613 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2614
2615 // Build a sequence of copy-to-reg nodes chained together with token chain
2616 // and flag operands which copy the outgoing args into the appropriate regs.
2617 SDValue InFlag;
2618 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2619 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2620 RegsToPass[i].second, InFlag);
2621 InFlag = Chain.getValue(1);
2622 }
2623
2624 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2625 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2626 // node so that legalize doesn't hack it.
2627 bool isDirect = false;
2628
2629 const TargetMachine &TM = getTargetMachine();
2630 const Module *Mod = MF.getFunction().getParent();
2631 const GlobalValue *GV = nullptr;
2632 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2633 GV = G->getGlobal();
2634 bool isStub =
2635 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2636
2637 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2638 bool isLocalARMFunc = false;
2639 auto PtrVt = getPointerTy(DAG.getDataLayout());
2640
2641 if (Subtarget->genLongCalls()) {
2642 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2643, __extension__
__PRETTY_FUNCTION__))
2643 "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2643, __extension__
__PRETTY_FUNCTION__))
;
2644 // Handle a global address or an external symbol. If it's not one of
2645 // those, the target's already in a register, so we don't need to do
2646 // anything extra.
2647 if (isa<GlobalAddressSDNode>(Callee)) {
2648 // Create a constant pool entry for the callee address
2649 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2650 ARMConstantPoolValue *CPV =
2651 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2652
2653 // Get the address of the callee into a register
2654 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2655 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2656 Callee = DAG.getLoad(
2657 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2658 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2659 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2660 const char *Sym = S->getSymbol();
2661
2662 // Create a constant pool entry for the callee address
2663 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2664 ARMConstantPoolValue *CPV =
2665 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2666 ARMPCLabelIndex, 0);
2667 // Get the address of the callee into a register
2668 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2669 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2670 Callee = DAG.getLoad(
2671 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2672 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2673 }
2674 } else if (isa<GlobalAddressSDNode>(Callee)) {
2675 if (!PreferIndirect) {
2676 isDirect = true;
2677 bool isDef = GV->isStrongDefinitionForLinker();
2678
2679 // ARM call to a local ARM function is predicable.
2680 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2681 // tBX takes a register source operand.
2682 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2683 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() &&
"WrapperPIC use on non-MachO?") ? void (0) : __assert_fail (
"Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2683, __extension__
__PRETTY_FUNCTION__))
;
2684 Callee = DAG.getNode(
2685 ARMISD::WrapperPIC, dl, PtrVt,
2686 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2687 Callee = DAG.getLoad(
2688 PtrVt, dl, DAG.getEntryNode(), Callee,
2689 MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
2690 MachineMemOperand::MODereferenceable |
2691 MachineMemOperand::MOInvariant);
2692 } else if (Subtarget->isTargetCOFF()) {
2693 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2694, __extension__
__PRETTY_FUNCTION__))
2694 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2694, __extension__
__PRETTY_FUNCTION__))
;
2695 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2696 if (GV->hasDLLImportStorageClass())
2697 TargetFlags = ARMII::MO_DLLIMPORT;
2698 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2699 TargetFlags = ARMII::MO_COFFSTUB;
2700 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2701 TargetFlags);
2702 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2703 Callee =
2704 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2705 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2706 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2707 } else {
2708 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2709 }
2710 }
2711 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2712 isDirect = true;
2713 // tBX takes a register source operand.
2714 const char *Sym = S->getSymbol();
2715 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2716 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2717 ARMConstantPoolValue *CPV =
2718 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2719 ARMPCLabelIndex, 4);
2720 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2721 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2722 Callee = DAG.getLoad(
2723 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2724 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2725 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2726 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2727 } else {
2728 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2729 }
2730 }
2731
2732 if (isCmseNSCall) {
2733 assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect &&
"Cannot handle call to ARM function or direct call") ? void (
0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2734, __extension__
__PRETTY_FUNCTION__))
2734 "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect &&
"Cannot handle call to ARM function or direct call") ? void (
0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2734, __extension__
__PRETTY_FUNCTION__))
;
2735 if (NumBytes > 0) {
2736 DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
2737 "call to non-secure function would "
2738 "require passing arguments on stack",
2739 dl.getDebugLoc());
2740 DAG.getContext()->diagnose(Diag);
2741 }
2742 if (isStructRet) {
2743 DiagnosticInfoUnsupported Diag(
2744 DAG.getMachineFunction().getFunction(),
2745 "call to non-secure function would return value through pointer",
2746 dl.getDebugLoc());
2747 DAG.getContext()->diagnose(Diag);
2748 }
2749 }
2750
2751 // FIXME: handle tail calls differently.
2752 unsigned CallOpc;
2753 if (Subtarget->isThumb()) {
2754 if (GuardWithBTI)
2755 CallOpc = ARMISD::t2CALL_BTI;
2756 else if (isCmseNSCall)
2757 CallOpc = ARMISD::tSECALL;
2758 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2759 CallOpc = ARMISD::CALL_NOLINK;
2760 else
2761 CallOpc = ARMISD::CALL;
2762 } else {
2763 if (!isDirect && !Subtarget->hasV5TOps())
2764 CallOpc = ARMISD::CALL_NOLINK;
2765 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2766 // Emit regular call when code size is the priority
2767 !Subtarget->hasMinSize())
2768 // "mov lr, pc; b _foo" to avoid confusing the RSP
2769 CallOpc = ARMISD::CALL_NOLINK;
2770 else
2771 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2772 }
2773
2774 // We don't usually want to end the call-sequence here because we would tidy
2775 // the frame up *after* the call, however in the ABI-changing tail-call case
2776 // we've carefully laid out the parameters so that when sp is reset they'll be
2777 // in the correct location.
2778 if (isTailCall && !isSibCall) {
2779 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
2780 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2781 InFlag = Chain.getValue(1);
2782 }
2783
2784 std::vector<SDValue> Ops;
2785 Ops.push_back(Chain);
2786 Ops.push_back(Callee);
2787
2788 if (isTailCall) {
2789 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2790 }
2791
2792 // Add argument registers to the end of the list so that they are known live
2793 // into the call.
2794 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2795 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2796 RegsToPass[i].second.getValueType()));
2797
2798 // Add a register mask operand representing the call-preserved registers.
2799 const uint32_t *Mask;
2800 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2801 if (isThisReturn) {
2802 // For 'this' returns, use the R0-preserving mask if applicable
2803 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2804 if (!Mask) {
2805 // Set isThisReturn to false if the calling convention is not one that
2806 // allows 'returned' to be modeled in this way, so LowerCallResult does
2807 // not try to pass 'this' straight through
2808 isThisReturn = false;
2809 Mask = ARI->getCallPreservedMask(MF, CallConv);
2810 }
2811 } else
2812 Mask = ARI->getCallPreservedMask(MF, CallConv);
2813
2814 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2814, __extension__
__PRETTY_FUNCTION__))
;
2815 Ops.push_back(DAG.getRegisterMask(Mask));
2816
2817 if (InFlag.getNode())
2818 Ops.push_back(InFlag);
2819
2820 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2821 if (isTailCall) {
2822 MF.getFrameInfo().setHasTailCall();
2823 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2824 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2825 return Ret;
2826 }
2827
2828 // Returns a chain and a flag for retval copy to use.
2829 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2830 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2831 InFlag = Chain.getValue(1);
2832 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2833
2834 // If we're guaranteeing tail-calls will be honoured, the callee must
2835 // pop its own argument stack on return. But this call is *not* a tail call so
2836 // we need to undo that after it returns to restore the status-quo.
2837 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2838 uint64_t CalleePopBytes =
2839 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2840
2841 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2842 DAG.getIntPtrConstant(CalleePopBytes, dl, true),
2843 InFlag, dl);
2844 if (!Ins.empty())
2845 InFlag = Chain.getValue(1);
2846
2847 // Handle result values, copying them out of physregs into vregs that we
2848 // return.
2849 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2850 InVals, isThisReturn,
2851 isThisReturn ? OutVals[0] : SDValue());
2852}
2853
2854/// HandleByVal - Every parameter *after* a byval parameter is passed
2855/// on the stack. Remember the next parameter register to allocate,
2856/// and then confiscate the rest of the parameter registers to insure
2857/// this.
2858void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2859 Align Alignment) const {
2860 // Byval (as with any stack) slots are always at least 4 byte aligned.
2861 Alignment = std::max(Alignment, Align(4));
2862
2863 unsigned Reg = State->AllocateReg(GPRArgRegs);
2864 if (!Reg)
2865 return;
2866
2867 unsigned AlignInRegs = Alignment.value() / 4;
2868 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2869 for (unsigned i = 0; i < Waste; ++i)
2870 Reg = State->AllocateReg(GPRArgRegs);
2871
2872 if (!Reg)
2873 return;
2874
2875 unsigned Excess = 4 * (ARM::R4 - Reg);
2876
2877 // Special case when NSAA != SP and parameter size greater than size of
2878 // all remained GPR regs. In that case we can't split parameter, we must
2879 // send it to stack. We also must set NCRN to R4, so waste all
2880 // remained registers.
2881 const unsigned NSAAOffset = State->getNextStackOffset();
2882 if (NSAAOffset != 0 && Size > Excess) {
2883 while (State->AllocateReg(GPRArgRegs))
2884 ;
2885 return;
2886 }
2887
2888 // First register for byval parameter is the first register that wasn't
2889 // allocated before this method call, so it would be "reg".
2890 // If parameter is small enough to be saved in range [reg, r4), then
2891 // the end (first after last) register would be reg + param-size-in-regs,
2892 // else parameter would be splitted between registers and stack,
2893 // end register would be r4 in this case.
2894 unsigned ByValRegBegin = Reg;
2895 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2896 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2897 // Note, first register is allocated in the beginning of function already,
2898 // allocate remained amount of registers we need.
2899 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2900 State->AllocateReg(GPRArgRegs);
2901 // A byval parameter that is split between registers and memory needs its
2902 // size truncated here.
2903 // In the case where the entire structure fits in registers, we set the
2904 // size in memory to zero.
2905 Size = std::max<int>(Size - Excess, 0);
2906}
2907
2908/// MatchingStackOffset - Return true if the given stack call argument is
2909/// already available in the same position (relatively) of the caller's
2910/// incoming argument stack.
2911static
2912bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2913 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2914 const TargetInstrInfo *TII) {
2915 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2916 int FI = std::numeric_limits<int>::max();
2917 if (Arg.getOpcode() == ISD::CopyFromReg) {
2918 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2919 if (!Register::isVirtualRegister(VR))
2920 return false;
2921 MachineInstr *Def = MRI->getVRegDef(VR);
2922 if (!Def)
2923 return false;
2924 if (!Flags.isByVal()) {
2925 if (!TII->isLoadFromStackSlot(*Def, FI))
2926 return false;
2927 } else {
2928 return false;
2929 }
2930 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2931 if (Flags.isByVal())
2932 // ByVal argument is passed in as a pointer but it's now being
2933 // dereferenced. e.g.
2934 // define @foo(%struct.X* %A) {
2935 // tail call @bar(%struct.X* byval %A)
2936 // }
2937 return false;
2938 SDValue Ptr = Ld->getBasePtr();
2939 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2940 if (!FINode)
2941 return false;
2942 FI = FINode->getIndex();
2943 } else
2944 return false;
2945
2946 assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int>
::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2946, __extension__
__PRETTY_FUNCTION__))
;
2947 if (!MFI.isFixedObjectIndex(FI))
2948 return false;
2949 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2950}
2951
2952/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2953/// for tail call optimization. Targets which want to do tail call
2954/// optimization should implement this function.
2955bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2956 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2957 bool isCalleeStructRet, bool isCallerStructRet,
2958 const SmallVectorImpl<ISD::OutputArg> &Outs,
2959 const SmallVectorImpl<SDValue> &OutVals,
2960 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
2961 const bool isIndirect) const {
2962 MachineFunction &MF = DAG.getMachineFunction();
2963 const Function &CallerF = MF.getFunction();
2964 CallingConv::ID CallerCC = CallerF.getCallingConv();
2965
2966 assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ?
void (0) : __assert_fail ("Subtarget->supportsTailCall()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2966, __extension__
__PRETTY_FUNCTION__))
;
2967
2968 // Indirect tail calls cannot be optimized for Thumb1 if the args
2969 // to the call take up r0-r3. The reason is that there are no legal registers
2970 // left to hold the pointer to the function to be called.
2971 // Similarly, if the function uses return address sign and authentication,
2972 // r12 is needed to hold the PAC and is not available to hold the callee
2973 // address.
2974 if (Outs.size() >= 4 &&
2975 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
2976 if (Subtarget->isThumb1Only())
2977 return false;
2978 // Conservatively assume the function spills LR.
2979 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
2980 return false;
2981 }
2982
2983 // Look for obvious safe cases to perform tail call optimization that do not
2984 // require ABI changes. This is what gcc calls sibcall.
2985
2986 // Exception-handling functions need a special set of instructions to indicate
2987 // a return to the hardware. Tail-calling another function would probably
2988 // break this.
2989 if (CallerF.hasFnAttribute("interrupt"))
2990 return false;
2991
2992 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
2993 return CalleeCC == CallerCC;
2994
2995 // Also avoid sibcall optimization if either caller or callee uses struct
2996 // return semantics.
2997 if (isCalleeStructRet || isCallerStructRet)
2998 return false;
2999
3000 // Externally-defined functions with weak linkage should not be
3001 // tail-called on ARM when the OS does not support dynamic
3002 // pre-emption of symbols, as the AAELF spec requires normal calls
3003 // to undefined weak functions to be replaced with a NOP or jump to the
3004 // next instruction. The behaviour of branch instructions in this
3005 // situation (as used for tail calls) is implementation-defined, so we
3006 // cannot rely on the linker replacing the tail call with a return.
3007 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3008 const GlobalValue *GV = G->getGlobal();
3009 const Triple &TT = getTargetMachine().getTargetTriple();
3010 if (GV->hasExternalWeakLinkage() &&
3011 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3012 return false;
3013 }
3014
3015 // Check that the call results are passed in the same way.
3016 LLVMContext &C = *DAG.getContext();
3017 if (!CCState::resultsCompatible(
3018 getEffectiveCallingConv(CalleeCC, isVarArg),
3019 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3020 CCAssignFnForReturn(CalleeCC, isVarArg),
3021 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3022 return false;
3023 // The callee has to preserve all registers the caller needs to preserve.
3024 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3025 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3026 if (CalleeCC != CallerCC) {
3027 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3028 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3029 return false;
3030 }
3031
3032 // If Caller's vararg or byval argument has been split between registers and
3033 // stack, do not perform tail call, since part of the argument is in caller's
3034 // local frame.
3035 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3036 if (AFI_Caller->getArgRegsSaveSize())
3037 return false;
3038
3039 // If the callee takes no arguments then go on to check the results of the
3040 // call.
3041 if (!Outs.empty()) {
3042 // Check if stack adjustment is needed. For now, do not do this if any
3043 // argument is passed on the stack.
3044 SmallVector<CCValAssign, 16> ArgLocs;
3045 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3046 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3047 if (CCInfo.getNextStackOffset()) {
3048 // Check if the arguments are already laid out in the right way as
3049 // the caller's fixed stack objects.
3050 MachineFrameInfo &MFI = MF.getFrameInfo();
3051 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3052 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3053 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3054 i != e;
3055 ++i, ++realArgIdx) {
3056 CCValAssign &VA = ArgLocs[i];
3057 EVT RegVT = VA.getLocVT();
3058 SDValue Arg = OutVals[realArgIdx];
3059 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3060 if (VA.getLocInfo() == CCValAssign::Indirect)
3061 return false;
3062 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3063 // f64 and vector types are split into multiple registers or
3064 // register/stack-slot combinations. The types will not match
3065 // the registers; give up on memory f64 refs until we figure
3066 // out what to do about this.
3067 if (!VA.isRegLoc())
3068 return false;
3069 if (!ArgLocs[++i].isRegLoc())
3070 return false;
3071 if (RegVT == MVT::v2f64) {
3072 if (!ArgLocs[++i].isRegLoc())
3073 return false;
3074 if (!ArgLocs[++i].isRegLoc())
3075 return false;
3076 }
3077 } else if (!VA.isRegLoc()) {
3078 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3079 MFI, MRI, TII))
3080 return false;
3081 }
3082 }
3083 }
3084
3085 const MachineRegisterInfo &MRI = MF.getRegInfo();
3086 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3087 return false;
3088 }
3089
3090 return true;
3091}
3092
3093bool
3094ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3095 MachineFunction &MF, bool isVarArg,
3096 const SmallVectorImpl<ISD::OutputArg> &Outs,
3097 LLVMContext &Context) const {
3098 SmallVector<CCValAssign, 16> RVLocs;
3099 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3100 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3101}
3102
3103static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
3104 const SDLoc &DL, SelectionDAG &DAG) {
3105 const MachineFunction &MF = DAG.getMachineFunction();
3106 const Function &F = MF.getFunction();
3107
3108 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3109
3110 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3111 // version of the "preferred return address". These offsets affect the return
3112 // instruction if this is a return from PL1 without hypervisor extensions.
3113 // IRQ/FIQ: +4 "subs pc, lr, #4"
3114 // SWI: 0 "subs pc, lr, #0"
3115 // ABORT: +4 "subs pc, lr, #4"
3116 // UNDEF: +4/+2 "subs pc, lr, #0"
3117 // UNDEF varies depending on where the exception came from ARM or Thumb
3118 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3119
3120 int64_t LROffset;
3121 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3122 IntKind == "ABORT")
3123 LROffset = 4;
3124 else if (IntKind == "SWI" || IntKind == "UNDEF")
3125 LROffset = 0;
3126 else
3127 report_fatal_error("Unsupported interrupt attribute. If present, value "
3128 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3129
3130 RetOps.insert(RetOps.begin() + 1,
3131 DAG.getConstant(LROffset, DL, MVT::i32, false));
3132
3133 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3134}
3135
3136SDValue
3137ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3138 bool isVarArg,
3139 const SmallVectorImpl<ISD::OutputArg> &Outs,
3140 const SmallVectorImpl<SDValue> &OutVals,
3141 const SDLoc &dl, SelectionDAG &DAG) const {
3142 // CCValAssign - represent the assignment of the return value to a location.
3143 SmallVector<CCValAssign, 16> RVLocs;
3144
3145 // CCState - Info about the registers and stack slots.
3146 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3147 *DAG.getContext());
3148
3149 // Analyze outgoing return values.
3150 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3151
3152 SDValue Flag;
3153 SmallVector<SDValue, 4> RetOps;
3154 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3155 bool isLittleEndian = Subtarget->isLittle();
3156
3157 MachineFunction &MF = DAG.getMachineFunction();
3158 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3159 AFI->setReturnRegsCount(RVLocs.size());
3160
3161 // Report error if cmse entry function returns structure through first ptr arg.
3162 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3163 // Note: using an empty SDLoc(), as the first line of the function is a
3164 // better place to report than the last line.
3165 DiagnosticInfoUnsupported Diag(
3166 DAG.getMachineFunction().getFunction(),
3167 "secure entry function would return value through pointer",
3168 SDLoc().getDebugLoc());
3169 DAG.getContext()->diagnose(Diag);
3170 }
3171
3172 // Copy the result values into the output registers.
3173 for (unsigned i = 0, realRVLocIdx = 0;
3174 i != RVLocs.size();
3175 ++i, ++realRVLocIdx) {
3176 CCValAssign &VA = RVLocs[i];
3177 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3177, __extension__
__PRETTY_FUNCTION__))
;
3178
3179 SDValue Arg = OutVals[realRVLocIdx];
3180 bool ReturnF16 = false;
3181
3182 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3183 // Half-precision return values can be returned like this:
3184 //
3185 // t11 f16 = fadd ...
3186 // t12: i16 = bitcast t11
3187 // t13: i32 = zero_extend t12
3188 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3189 //
3190 // to avoid code generation for bitcasts, we simply set Arg to the node
3191 // that produces the f16 value, t11 in this case.
3192 //
3193 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3194 SDValue ZE = Arg.getOperand(0);
3195 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3196 SDValue BC = ZE.getOperand(0);
3197 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3198 Arg = BC.getOperand(0);
3199 ReturnF16 = true;
3200 }
3201 }
3202 }
3203 }
3204
3205 switch (VA.getLocInfo()) {
3206 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3206)
;
3207 case CCValAssign::Full: break;
3208 case CCValAssign::BCvt:
3209 if (!ReturnF16)
3210 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3211 break;
3212 }
3213
3214 // Mask f16 arguments if this is a CMSE nonsecure entry.
3215 auto RetVT = Outs[realRVLocIdx].ArgVT;
3216 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3217 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3218 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3219 } else {
3220 auto LocBits = VA.getLocVT().getSizeInBits();
3221 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3222 SDValue Mask =
3223 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3224 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3225 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3226 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3227 }
3228 }
3229
3230 if (VA.needsCustom() &&
3231 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3232 if (VA.getLocVT() == MVT::v2f64) {
3233 // Extract the first half and return it in two registers.
3234 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3235 DAG.getConstant(0, dl, MVT::i32));
3236 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3237 DAG.getVTList(MVT::i32, MVT::i32), Half);
3238
3239 Chain =
3240 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3241 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3242 Flag = Chain.getValue(1);
3243 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3244 VA = RVLocs[++i]; // skip ahead to next loc
3245 Chain =
3246 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3247 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3248 Flag = Chain.getValue(1);
3249 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3250 VA = RVLocs[++i]; // skip ahead to next loc
3251
3252 // Extract the 2nd half and fall through to handle it as an f64 value.
3253 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3254 DAG.getConstant(1, dl, MVT::i32));
3255 }
3256 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3257 // available.
3258 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3259 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3260 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3261 fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3262 Flag = Chain.getValue(1);
3263 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3264 VA = RVLocs[++i]; // skip ahead to next loc
3265 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3266 fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3267 } else
3268 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3269
3270 // Guarantee that all emitted copies are
3271 // stuck together, avoiding something bad.
3272 Flag = Chain.getValue(1);
3273 RetOps.push_back(DAG.getRegister(
3274 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3275 }
3276 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3277 const MCPhysReg *I =
3278 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3279 if (I) {
3280 for (; *I; ++I) {
3281 if (ARM::GPRRegClass.contains(*I))
3282 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3283 else if (ARM::DPRRegClass.contains(*I))
3284 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3285 else
3286 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3286)
;
3287 }
3288 }
3289
3290 // Update chain and glue.
3291 RetOps[0] = Chain;
3292 if (Flag.getNode())
3293 RetOps.push_back(Flag);
3294
3295 // CPUs which aren't M-class use a special sequence to return from
3296 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3297 // though we use "subs pc, lr, #N").
3298 //
3299 // M-class CPUs actually use a normal return sequence with a special
3300 // (hardware-provided) value in LR, so the normal code path works.
3301 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3302 !Subtarget->isMClass()) {
3303 if (Subtarget->isThumb1Only())
3304 report_fatal_error("interrupt attribute is not supported in Thumb1");
3305 return LowerInterruptReturn(RetOps, dl, DAG);
3306 }
3307
3308 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
3309 ARMISD::RET_FLAG;
3310 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3311}
3312
3313bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3314 if (N->getNumValues() != 1)
3315 return false;
3316 if (!N->hasNUsesOfValue(1, 0))
3317 return false;
3318
3319 SDValue TCChain = Chain;
3320 SDNode *Copy = *N->use_begin();
3321 if (Copy->getOpcode() == ISD::CopyToReg) {
3322 // If the copy has a glue operand, we conservatively assume it isn't safe to
3323 // perform a tail call.
3324 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3325 return false;
3326 TCChain = Copy->getOperand(0);
3327 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3328 SDNode *VMov = Copy;
3329 // f64 returned in a pair of GPRs.
3330 SmallPtrSet<SDNode*, 2> Copies;
3331 for (SDNode *U : VMov->uses()) {
3332 if (U->getOpcode() != ISD::CopyToReg)
3333 return false;
3334 Copies.insert(U);
3335 }
3336 if (Copies.size() > 2)
3337 return false;
3338
3339 for (SDNode *U : VMov->uses()) {
3340 SDValue UseChain = U->getOperand(0);
3341 if (Copies.count(UseChain.getNode()))
3342 // Second CopyToReg
3343 Copy = U;
3344 else {
3345 // We are at the top of this chain.
3346 // If the copy has a glue operand, we conservatively assume it
3347 // isn't safe to perform a tail call.
3348 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3349 return false;
3350 // First CopyToReg
3351 TCChain = UseChain;
3352 }
3353 }
3354 } else if (Copy->getOpcode() == ISD::BITCAST) {
3355 // f32 returned in a single GPR.
3356 if (!Copy->hasOneUse())
3357 return false;
3358 Copy = *Copy->use_begin();
3359 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3360 return false;
3361 // If the copy has a glue operand, we conservatively assume it isn't safe to
3362 // perform a tail call.
3363 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3364 return false;
3365 TCChain = Copy->getOperand(0);
3366 } else {
3367 return false;
3368 }
3369
3370 bool HasRet = false;
3371 for (const SDNode *U : Copy->uses()) {
3372 if (U->getOpcode() != ARMISD::RET_FLAG &&
3373 U->getOpcode() != ARMISD::INTRET_FLAG)
3374 return false;
3375 HasRet = true;
3376 }
3377
3378 if (!HasRet)
3379 return false;
3380
3381 Chain = TCChain;
3382 return true;
3383}
3384
3385bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3386 if (!Subtarget->supportsTailCall())
3387 return false;
3388
3389 if (!CI->isTailCall())
3390 return false;
3391
3392 return true;
3393}
3394
3395// Trying to write a 64 bit value so need to split into two 32 bit values first,
3396// and pass the lower and high parts through.
3397static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
3398 SDLoc DL(Op);
3399 SDValue WriteValue = Op->getOperand(2);
3400
3401 // This function is only supposed to be called for i64 type argument.
3402 assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3403, __extension__
__PRETTY_FUNCTION__))
3403 && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3403, __extension__
__PRETTY_FUNCTION__))
;
3404
3405 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3406 DAG.getConstant(0, DL, MVT::i32));
3407 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3408 DAG.getConstant(1, DL, MVT::i32));
3409 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3410 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3411}
3412
3413// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3414// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3415// one of the above mentioned nodes. It has to be wrapped because otherwise
3416// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3417// be used to form addressing mode. These wrapped nodes will be selected
3418// into MOVi.
3419SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3420 SelectionDAG &DAG) const {
3421 EVT PtrVT = Op.getValueType();
3422 // FIXME there is no actual debug info here
3423 SDLoc dl(Op);
3424 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3425 SDValue Res;
3426
3427 // When generating execute-only code Constant Pools must be promoted to the
3428 // global data section. It's a bit ugly that we can't share them across basic
3429 // blocks, but this way we guarantee that execute-only behaves correct with
3430 // position-independent addressing modes.
3431 if (Subtarget->genExecuteOnly()) {
3432 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3433 auto T = const_cast<Type*>(CP->getType());
3434 auto C = const_cast<Constant*>(CP->getConstVal());
3435 auto M = const_cast<Module*>(DAG.getMachineFunction().
3436 getFunction().getParent());
3437 auto GV = new GlobalVariable(
3438 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3439 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3440 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3441 Twine(AFI->createPICLabelUId())
3442 );
3443 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3444 dl, PtrVT);
3445 return LowerGlobalAddress(GA, DAG);
3446 }
3447
3448 if (CP->isMachineConstantPoolEntry())
3449 Res =
3450 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3451 else
3452 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3453 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3454}
3455
3456unsigned ARMTargetLowering::getJumpTableEncoding() const {
3457 return MachineJumpTableInfo::EK_Inline;
3458}
3459
3460SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3461 SelectionDAG &DAG) const {
3462 MachineFunction &MF = DAG.getMachineFunction();
3463 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3464 unsigned ARMPCLabelIndex = 0;
3465 SDLoc DL(Op);
3466 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3467 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3468 SDValue CPAddr;
3469 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3470 if (!IsPositionIndependent) {
3471 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3472 } else {
3473 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3474 ARMPCLabelIndex = AFI->createPICLabelUId();
3475 ARMConstantPoolValue *CPV =
3476 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3477 ARMCP::CPBlockAddress, PCAdj);
3478 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3479 }
3480 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3481 SDValue Result = DAG.getLoad(
3482 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3483 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3484 if (!IsPositionIndependent)
3485 return Result;
3486 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3487 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3488}
3489
3490/// Convert a TLS address reference into the correct sequence of loads
3491/// and calls to compute the variable's address for Darwin, and return an
3492/// SDValue containing the final node.
3493
3494/// Darwin only has one TLS scheme which must be capable of dealing with the
3495/// fully general situation, in the worst case. This means:
3496/// + "extern __thread" declaration.
3497/// + Defined in a possibly unknown dynamic library.
3498///
3499/// The general system is that each __thread variable has a [3 x i32] descriptor
3500/// which contains information used by the runtime to calculate the address. The
3501/// only part of this the compiler needs to know about is the first word, which
3502/// contains a function pointer that must be called with the address of the
3503/// entire descriptor in "r0".
3504///
3505/// Since this descriptor may be in a different unit, in general access must
3506/// proceed along the usual ARM rules. A common sequence to produce is:
3507///
3508/// movw rT1, :lower16:_var$non_lazy_ptr
3509/// movt rT1, :upper16:_var$non_lazy_ptr
3510/// ldr r0, [rT1]
3511/// ldr rT2, [r0]
3512/// blx rT2
3513/// [...address now in r0...]
3514SDValue
3515ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3516 SelectionDAG &DAG) const {
3517 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3518, __extension__
__PRETTY_FUNCTION__))
3518 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3518, __extension__
__PRETTY_FUNCTION__))
;
3519 SDLoc DL(Op);
3520
3521 // First step is to get the address of the actua global symbol. This is where
3522 // the TLS descriptor lives.
3523 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3524
3525 // The first entry in the descriptor is a function pointer that we must call
3526 // to obtain the address of the variable.
3527 SDValue Chain = DAG.getEntryNode();
3528 SDValue FuncTLVGet = DAG.getLoad(
3529 MVT::i32, DL, Chain, DescAddr,
3530 MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
3531 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
3532 MachineMemOperand::MOInvariant);
3533 Chain = FuncTLVGet.getValue(1);
3534
3535 MachineFunction &F = DAG.getMachineFunction();
3536 MachineFrameInfo &MFI = F.getFrameInfo();
3537 MFI.setAdjustsStack(true);
3538
3539 // TLS calls preserve all registers except those that absolutely must be
3540 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3541 // silly).
3542 auto TRI =
3543 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3544 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3545 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3546
3547 // Finally, we can make the call. This is just a degenerate version of a
3548 // normal AArch64 call node: r0 takes the address of the descriptor, and
3549 // returns the address of the variable in this thread.
3550 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3551 Chain =
3552 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3553 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3554 DAG.getRegisterMask(Mask), Chain.getValue(1));
3555 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3556}
3557
3558SDValue
3559ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3560 SelectionDAG &DAG) const {
3561 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3561, __extension__
__PRETTY_FUNCTION__))
;
3562
3563 SDValue Chain = DAG.getEntryNode();
3564 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3565 SDLoc DL(Op);
3566
3567 // Load the current TEB (thread environment block)
3568 SDValue Ops[] = {Chain,
3569 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3570 DAG.getTargetConstant(15, DL, MVT::i32),
3571 DAG.getTargetConstant(0, DL, MVT::i32),
3572 DAG.getTargetConstant(13, DL, MVT::i32),
3573 DAG.getTargetConstant(0, DL, MVT::i32),
3574 DAG.getTargetConstant(2, DL, MVT::i32)};
3575 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3576 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3577
3578 SDValue TEB = CurrentTEB.getValue(0);
3579 Chain = CurrentTEB.getValue(1);
3580
3581 // Load the ThreadLocalStoragePointer from the TEB
3582 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3583 SDValue TLSArray =
3584 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3585 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3586
3587 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3588 // offset into the TLSArray.
3589
3590 // Load the TLS index from the C runtime
3591 SDValue TLSIndex =
3592 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3593 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3594 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3595
3596 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3597 DAG.getConstant(2, DL, MVT::i32));
3598 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3599 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3600 MachinePointerInfo());
3601
3602 // Get the offset of the start of the .tls section (section base)
3603 const auto *GA = cast<GlobalAddressSDNode>(Op);
3604 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3605 SDValue Offset = DAG.getLoad(
3606 PtrVT, DL, Chain,
3607 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3608 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3609 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3610
3611 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3612}
3613
3614// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3615SDValue
3616ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3617 SelectionDAG &DAG) const {
3618 SDLoc dl(GA);
3619 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3620 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3621 MachineFunction &MF = DAG.getMachineFunction();
3622 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3623 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3624 ARMConstantPoolValue *CPV =
3625 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3626 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3627 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3628 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3629 Argument = DAG.getLoad(
3630 PtrVT, dl, DAG.getEntryNode(), Argument,
3631 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3632 SDValue Chain = Argument.getValue(1);
3633
3634 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3635 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3636
3637 // call __tls_get_addr.
3638 ArgListTy Args;
3639 ArgListEntry Entry;
3640 Entry.Node = Argument;
3641 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3642 Args.push_back(Entry);
3643
3644 // FIXME: is there useful debug info available here?
3645 TargetLowering::CallLoweringInfo CLI(DAG);
3646 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3647 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
3648 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3649
3650 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3651 return CallResult.first;
3652}
3653
3654// Lower ISD::GlobalTLSAddress using the "initial exec" or
3655// "local exec" model.
3656SDValue
3657ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3658 SelectionDAG &DAG,
3659 TLSModel::Model model) const {
3660 const GlobalValue *GV = GA->getGlobal();
3661 SDLoc dl(GA);
3662 SDValue Offset;
3663 SDValue Chain = DAG.getEntryNode();
3664 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3665 // Get the Thread Pointer
3666 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3667
3668 if (model == TLSModel::InitialExec) {
3669 MachineFunction &MF = DAG.getMachineFunction();
3670 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3671 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3672 // Initial exec model.
3673 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3674 ARMConstantPoolValue *CPV =
3675 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3676 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
3677 true);
3678 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3679 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3680 Offset = DAG.getLoad(
3681 PtrVT, dl, Chain, Offset,
3682 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3683 Chain = Offset.getValue(1);
3684
3685 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3686 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3687
3688 Offset = DAG.getLoad(
3689 PtrVT, dl, Chain, Offset,
3690 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3691 } else {
3692 // local exec model
3693 assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void
(0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3693, __extension__ __PRETTY_FUNCTION__))
;
3694 ARMConstantPoolValue *CPV =
3695 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3696 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3697 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3698 Offset = DAG.getLoad(
3699 PtrVT, dl, Chain, Offset,
3700 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3701 }
3702
3703 // The address of the thread local variable is the add of the thread
3704 // pointer with the offset of the variable.
3705 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3706}
3707
3708SDValue
3709ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3710 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3711 if (DAG.getTarget().useEmulatedTLS())
3712 return LowerToTLSEmulatedModel(GA, DAG);
3713
3714 if (Subtarget->isTargetDarwin())
3715 return LowerGlobalTLSAddressDarwin(Op, DAG);
3716
3717 if (Subtarget->isTargetWindows())
3718 return LowerGlobalTLSAddressWindows(Op, DAG);
3719
3720 // TODO: implement the "local dynamic" model
3721 assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() &&
"Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3721, __extension__
__PRETTY_FUNCTION__))
;
3722 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3723
3724 switch (model) {
3725 case TLSModel::GeneralDynamic:
3726 case TLSModel::LocalDynamic:
3727 return LowerToTLSGeneralDynamicModel(GA, DAG);
3728 case TLSModel::InitialExec:
3729 case TLSModel::LocalExec:
3730 return LowerToTLSExecModels(GA, DAG, model);
3731 }
3732 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3732)
;
3733}
3734
3735/// Return true if all users of V are within function F, looking through
3736/// ConstantExprs.
3737static bool allUsersAreInFunction(const Value *V, const Function *F) {
3738 SmallVector<const User*,4> Worklist(V->users());
3739 while (!Worklist.empty()) {
3740 auto *U = Worklist.pop_back_val();
3741 if (isa<ConstantExpr>(U)) {
3742 append_range(Worklist, U->users());
3743 continue;
3744 }
3745
3746 auto *I = dyn_cast<Instruction>(U);
3747 if (!I || I->getParent()->getParent() != F)
3748 return false;
3749 }
3750 return true;
3751}
3752
3753static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3754 const GlobalValue *GV, SelectionDAG &DAG,
3755 EVT PtrVT, const SDLoc &dl) {
3756 // If we're creating a pool entry for a constant global with unnamed address,
3757 // and the global is small enough, we can emit it inline into the constant pool
3758 // to save ourselves an indirection.
3759 //
3760 // This is a win if the constant is only used in one function (so it doesn't
3761 // need to be duplicated) or duplicating the constant wouldn't increase code
3762 // size (implying the constant is no larger than 4 bytes).
3763 const Function &F = DAG.getMachineFunction().getFunction();
3764
3765 // We rely on this decision to inline being idemopotent and unrelated to the
3766 // use-site. We know that if we inline a variable at one use site, we'll
3767 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3768 // doesn't know about this optimization, so bail out if it's enabled else
3769 // we could decide to inline here (and thus never emit the GV) but require
3770 // the GV from fast-isel generated code.
3771 if (!EnableConstpoolPromotion ||
3772 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3773 return SDValue();
3774
3775 auto *GVar = dyn_cast<GlobalVariable>(GV);
3776 if (!GVar || !GVar->hasInitializer() ||
3777 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3778 !GVar->hasLocalLinkage())
3779 return SDValue();
3780
3781 // If we inline a value that contains relocations, we move the relocations
3782 // from .data to .text. This is not allowed in position-independent code.
3783 auto *Init = GVar->getInitializer();
3784 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3785 Init->needsDynamicRelocation())
3786 return SDValue();
3787
3788 // The constant islands pass can only really deal with alignment requests
3789 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3790 // any type wanting greater alignment requirements than 4 bytes. We also
3791 // can only promote constants that are multiples of 4 bytes in size or
3792 // are paddable to a multiple of 4. Currently we only try and pad constants
3793 // that are strings for simplicity.
3794 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3795 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3796 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3797 unsigned RequiredPadding = 4 - (Size % 4);
3798 bool PaddingPossible =
3799 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3800 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3801 Size == 0)
3802 return SDValue();
3803
3804 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3805 MachineFunction &MF = DAG.getMachineFunction();
3806 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3807
3808 // We can't bloat the constant pool too much, else the ConstantIslands pass
3809 // may fail to converge. If we haven't promoted this global yet (it may have
3810 // multiple uses), and promoting it would increase the constant pool size (Sz
3811 // > 4), ensure we have space to do so up to MaxTotal.
3812 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3813 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3814 ConstpoolPromotionMaxTotal)
3815 return SDValue();
3816
3817 // This is only valid if all users are in a single function; we can't clone
3818 // the constant in general. The LLVM IR unnamed_addr allows merging
3819 // constants, but not cloning them.
3820 //
3821 // We could potentially allow cloning if we could prove all uses of the
3822 // constant in the current function don't care about the address, like
3823 // printf format strings. But that isn't implemented for now.
3824 if (!allUsersAreInFunction(GVar, &F))
3825 return SDValue();
3826
3827 // We're going to inline this global. Pad it out if needed.
3828 if (RequiredPadding != 4) {
3829 StringRef S = CDAInit->getAsString();
3830
3831 SmallVector<uint8_t,16> V(S.size());
3832 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3833 while (RequiredPadding--)
3834 V.push_back(0);
3835 Init = ConstantDataArray::get(*DAG.getContext(), V);
3836 }
3837
3838 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3839 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3840 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3841 AFI->markGlobalAsPromotedToConstantPool(GVar);
3842 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3843 PaddedSize - 4);
3844 }
3845 ++NumConstpoolPromoted;
3846 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3847}
3848
3849bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3850 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3851 if (!(GV = GA->getAliaseeObject()))
3852 return false;
3853 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3854 return V->isConstant();
3855 return isa<Function>(GV);
3856}
3857
3858SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3859 SelectionDAG &DAG) const {
3860 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3861 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3861)
;
3862 case Triple::COFF:
3863 return LowerGlobalAddressWindows(Op, DAG);
3864 case Triple::ELF:
3865 return LowerGlobalAddressELF(Op, DAG);
3866 case Triple::MachO:
3867 return LowerGlobalAddressDarwin(Op, DAG);
3868 }
3869}
3870
3871SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3872 SelectionDAG &DAG) const {
3873 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3874 SDLoc dl(Op);
3875 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3876 const TargetMachine &TM = getTargetMachine();
3877 bool IsRO = isReadOnly(GV);
3878
3879 // promoteToConstantPool only if not generating XO text section
3880 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3881 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3882 return V;
3883
3884 if (isPositionIndependent()) {
3885 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3886 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3887 UseGOT_PREL ? ARMII::MO_GOT : 0);
3888 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3889 if (UseGOT_PREL)
3890 Result =
3891 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3892 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3893 return Result;
3894 } else if (Subtarget->isROPI() && IsRO) {
3895 // PC-relative.
3896 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3897 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3898 return Result;
3899 } else if (Subtarget->isRWPI() && !IsRO) {
3900 // SB-relative.
3901 SDValue RelAddr;
3902 if (Subtarget->useMovt()) {
3903 ++NumMovwMovt;
3904 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3905 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3906 } else { // use literal pool for address constant
3907 ARMConstantPoolValue *CPV =
3908 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3909 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3910 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3911 RelAddr = DAG.getLoad(
3912 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3913 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3914 }
3915 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3916 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3917 return Result;
3918 }
3919
3920 // If we have T2 ops, we can materialize the address directly via movt/movw
3921 // pair. This is always cheaper.
3922 if (Subtarget->useMovt()) {
3923 ++NumMovwMovt;
3924 // FIXME: Once remat is capable of dealing with instructions with register
3925 // operands, expand this into two nodes.
3926 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3927 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3928 } else {
3929 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3930 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3931 return DAG.getLoad(
3932 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3933 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3934 }
3935}
3936
3937SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3938 SelectionDAG &DAG) const {
3939 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3940, __extension__
__PRETTY_FUNCTION__))
3940 "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3940, __extension__
__PRETTY_FUNCTION__))
;
3941 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3942 SDLoc dl(Op);
3943 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3944
3945 if (Subtarget->useMovt())
3946 ++NumMovwMovt;
3947
3948 // FIXME: Once remat is capable of dealing with instructions with register
3949 // operands, expand this into multiple nodes
3950 unsigned Wrapper =
3951 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3952
3953 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3954 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3955
3956 if (Subtarget->isGVIndirectSymbol(GV))
3957 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3958 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3959 return Result;
3960}
3961
3962SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3963 SelectionDAG &DAG) const {
3964 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"non-Windows COFF is not supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3964, __extension__
__PRETTY_FUNCTION__))
;
3965 assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail
("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3966, __extension__
__PRETTY_FUNCTION__))
3966 "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail
("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3966, __extension__
__PRETTY_FUNCTION__))
;
3967 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3968, __extension__
__PRETTY_FUNCTION__))
3968 "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3968, __extension__
__PRETTY_FUNCTION__))
;
3969
3970 const TargetMachine &TM = getTargetMachine();
3971 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3972 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3973 if (GV->hasDLLImportStorageClass())
3974 TargetFlags = ARMII::MO_DLLIMPORT;
3975 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3976 TargetFlags = ARMII::MO_COFFSTUB;
3977 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3978 SDValue Result;
3979 SDLoc DL(Op);
3980
3981 ++NumMovwMovt;
3982
3983 // FIXME: Once remat is capable of dealing with instructions with register
3984 // operands, expand this into two nodes.
3985 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3986 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3987 TargetFlags));
3988 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3989 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3990 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3991 return Result;
3992}
3993
3994SDValue
3995ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3996 SDLoc dl(Op);
3997 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3998 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3999 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4000 Op.getOperand(1), Val);
4001}
4002
4003SDValue
4004ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4005 SDLoc dl(Op);
4006 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4007 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4008}
4009
4010SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4011 SelectionDAG &DAG) const {
4012 SDLoc dl(Op);
4013 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4014 Op.getOperand(0));
4015}
4016
4017SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4018 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4019 unsigned IntNo =
4020 cast<ConstantSDNode>(
4021 Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
4022 ->getZExtValue();
4023 switch (IntNo) {
4024 default:
4025 return SDValue(); // Don't custom lower most intrinsics.
4026 case Intrinsic::arm_gnu_eabi_mcount: {
4027 MachineFunction &MF = DAG.getMachineFunction();
4028 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4029 SDLoc dl(Op);
4030 SDValue Chain = Op.getOperand(0);
4031 // call "\01__gnu_mcount_nc"
4032 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4033 const uint32_t *Mask =
4034 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4035 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4035, __extension__
__PRETTY_FUNCTION__))
;
4036 // Mark LR an implicit live-in.
4037 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4038 SDValue ReturnAddress =
4039 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4040 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4041 SDValue Callee =
4042 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4043 SDValue RegisterMask = DAG.getRegisterMask(Mask);
4044 if (Subtarget->isThumb())
4045 return SDValue(
4046 DAG.getMachineNode(
4047 ARM::tBL_PUSHLR, dl, ResultTys,
4048 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4049 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4050 0);
4051 return SDValue(
4052 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4053 {ReturnAddress, Callee, RegisterMask, Chain}),
4054 0);
4055 }
4056 }
4057}
4058
4059SDValue
4060ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4061 const ARMSubtarget *Subtarget) const {
4062 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4063 SDLoc dl(Op);
4064 switch (IntNo) {
4065 default: return SDValue(); // Don't custom lower most intrinsics.
4066 case Intrinsic::thread_pointer: {
4067 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4068 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4069 }
4070 case Intrinsic::arm_cls: {
4071 const SDValue &Operand = Op.getOperand(1);
4072 const EVT VTy = Op.getValueType();
4073 SDValue SRA =
4074 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4075 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4076 SDValue SHL =
4077 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4078 SDValue OR =
4079 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4080 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4081 return Result;
4082 }
4083 case Intrinsic::arm_cls64: {
4084 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4085 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4086 const SDValue &Operand = Op.getOperand(1);
4087 const EVT VTy = Op.getValueType();
4088
4089 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4090 DAG.getConstant(1, dl, VTy));
4091 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4092 DAG.getConstant(0, dl, VTy));
4093 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4094 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4095 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4096 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4097 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4098 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4099 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4100 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4101 SDValue CheckLo =
4102 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4103 SDValue HiIsZero =
4104 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4105 SDValue AdjustedLo =
4106 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4107 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4108 SDValue Result =
4109 DAG.getSelect(dl, VTy, CheckLo,
4110 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4111 return Result;
4112 }
4113 case Intrinsic::eh_sjlj_lsda: {
4114 MachineFunction &MF = DAG.getMachineFunction();
4115 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4116 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4117 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4118 SDValue CPAddr;
4119 bool IsPositionIndependent = isPositionIndependent();
4120 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4121 ARMConstantPoolValue *CPV =
4122 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4123 ARMCP::CPLSDA, PCAdj);
4124 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4125 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4126 SDValue Result = DAG.getLoad(
4127 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4128 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
4129
4130 if (IsPositionIndependent) {
4131 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4132 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4133 }
4134 return Result;
4135 }
4136 case Intrinsic::arm_neon_vabs:
4137 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4138 Op.getOperand(1));
4139 case Intrinsic::arm_neon_vmulls:
4140 case Intrinsic::arm_neon_vmullu: {
4141 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4142 ? ARMISD::VMULLs : ARMISD::VMULLu;
4143 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4144 Op.getOperand(1), Op.getOperand(2));
4145 }
4146 case Intrinsic::arm_neon_vminnm:
4147 case Intrinsic::arm_neon_vmaxnm: {
4148 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4149 ? ISD::FMINNUM : ISD::FMAXNUM;
4150 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4151 Op.getOperand(1), Op.getOperand(2));
4152 }
4153 case Intrinsic::arm_neon_vminu:
4154 case Intrinsic::arm_neon_vmaxu: {
4155 if (Op.getValueType().isFloatingPoint())
4156 return SDValue();
4157 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4158 ? ISD::UMIN : ISD::UMAX;
4159 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4160 Op.getOperand(1), Op.getOperand(2));
4161 }
4162 case Intrinsic::arm_neon_vmins:
4163 case Intrinsic::arm_neon_vmaxs: {
4164 // v{min,max}s is overloaded between signed integers and floats.
4165 if (!Op.getValueType().isFloatingPoint()) {
4166 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4167 ? ISD::SMIN : ISD::SMAX;
4168 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4169 Op.getOperand(1), Op.getOperand(2));
4170 }
4171 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4172 ? ISD::FMINIMUM : ISD::FMAXIMUM;
4173 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4174 Op.getOperand(1), Op.getOperand(2));
4175 }
4176 case Intrinsic::arm_neon_vtbl1:
4177 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4178 Op.getOperand(1), Op.getOperand(2));
4179 case Intrinsic::arm_neon_vtbl2:
4180 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4181 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4182 case Intrinsic::arm_mve_pred_i2v:
4183 case Intrinsic::arm_mve_pred_v2i:
4184 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4185 Op.getOperand(1));
4186 case Intrinsic::arm_mve_vreinterpretq:
4187 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4188 Op.getOperand(1));
4189 case Intrinsic::arm_mve_lsll:
4190 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4191 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4192 case Intrinsic::arm_mve_asrl:
4193 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4194 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4195 }
4196}
4197
4198static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4199 const ARMSubtarget *Subtarget) {
4200 SDLoc dl(Op);
4201 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4202 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4203 if (SSID == SyncScope::SingleThread)
4204 return Op;
4205
4206 if (!Subtarget->hasDataBarrier()) {
4207 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4208 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4209 // here.
4210 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4211, __extension__
__PRETTY_FUNCTION__))
4211 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4211, __extension__
__PRETTY_FUNCTION__))
;
4212 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4213 DAG.getConstant(0, dl, MVT::i32));
4214 }
4215
4216 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4217 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4218 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
4219 if (Subtarget->isMClass()) {
4220 // Only a full system barrier exists in the M-class architectures.
4221 Domain = ARM_MB::SY;
4222 } else if (Subtarget->preferISHSTBarriers() &&
4223 Ord == AtomicOrdering::Release) {
4224 // Swift happens to implement ISHST barriers in a way that's compatible with
4225 // Release semantics but weaker than ISH so we'd be fools not to use
4226 // it. Beware: other processors probably don't!
4227 Domain = ARM_MB::ISHST;
4228 }
4229
4230 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4231 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4232 DAG.getConstant(Domain, dl, MVT::i32));
4233}
4234
4235static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
4236 const ARMSubtarget *Subtarget) {
4237 // ARM pre v5TE and Thumb1 does not have preload instructions.
4238 if (!(Subtarget->isThumb2() ||
4239 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4240 // Just preserve the chain.
4241 return Op.getOperand(0);
4242
4243 SDLoc dl(Op);
4244 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4245 if (!isRead &&
4246 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4247 // ARMv7 with MP extension has PLDW.
4248 return Op.getOperand(0);
4249
4250 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4251 if (Subtarget->isThumb()) {
4252 // Invert the bits.
4253 isRead = ~isRead & 1;
4254 isData = ~isData & 1;
4255 }
4256
4257 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4258 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4259 DAG.getConstant(isData, dl, MVT::i32));
4260}
4261
4262static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
4263 MachineFunction &MF = DAG.getMachineFunction();
4264 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4265
4266 // vastart just stores the address of the VarArgsFrameIndex slot into the
4267 // memory location argument.
4268 SDLoc dl(Op);
4269 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4270 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4271 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4272 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4273 MachinePointerInfo(SV));
4274}
4275
4276SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4277 CCValAssign &NextVA,
4278 SDValue &Root,
4279 SelectionDAG &DAG,
4280 const SDLoc &dl) const {
4281 MachineFunction &MF = DAG.getMachineFunction();
4282 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4283
4284 const TargetRegisterClass *RC;
4285 if (AFI->isThumb1OnlyFunction())
4286 RC = &ARM::tGPRRegClass;
4287 else
4288 RC = &ARM::GPRRegClass;
4289
4290 // Transform the arguments stored in physical registers into virtual ones.
4291 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4292 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4293
4294 SDValue ArgValue2;
4295 if (NextVA.isMemLoc()) {
4296 MachineFrameInfo &MFI = MF.getFrameInfo();
4297 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4298
4299 // Create load node to retrieve arguments from the stack.
4300 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4301 ArgValue2 = DAG.getLoad(
4302 MVT::i32, dl, Root, FIN,
4303 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4304 } else {
4305 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4306 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4307 }
4308 if (!Subtarget->isLittle())
4309 std::swap (ArgValue, ArgValue2);
4310 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4311}
4312
4313// The remaining GPRs hold either the beginning of variable-argument
4314// data, or the beginning of an aggregate passed by value (usually
4315// byval). Either way, we allocate stack slots adjacent to the data
4316// provided by our caller, and store the unallocated registers there.
4317// If this is a variadic function, the va_list pointer will begin with
4318// these values; otherwise, this reassembles a (byval) structure that
4319// was split between registers and memory.
4320// Return: The frame index registers were stored into.
4321int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4322 const SDLoc &dl, SDValue &Chain,
4323 const Value *OrigArg,
4324 unsigned InRegsParamRecordIdx,
4325 int ArgOffset, unsigned ArgSize) const {
4326 // Currently, two use-cases possible:
4327 // Case #1. Non-var-args function, and we meet first byval parameter.
4328 // Setup first unallocated register as first byval register;
4329 // eat all remained registers
4330 // (these two actions are performed by HandleByVal method).
4331 // Then, here, we initialize stack frame with
4332 // "store-reg" instructions.
4333 // Case #2. Var-args function, that doesn't contain byval parameters.
4334 // The same: eat all remained unallocated registers,
4335 // initialize stack frame.
4336
4337 MachineFunction &MF = DAG.getMachineFunction();
4338 MachineFrameInfo &MFI = MF.getFrameInfo();
4339 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4340 unsigned RBegin, REnd;
4341 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4342 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4343 } else {
4344 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4345 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4346 REnd = ARM::R4;
4347 }
4348
4349 if (REnd != RBegin)
4350 ArgOffset = -4 * (ARM::R4 - RBegin);
4351
4352 auto PtrVT = getPointerTy(DAG.getDataLayout());
4353 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4354 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4355
4356 SmallVector<SDValue, 4> MemOps;
4357 const TargetRegisterClass *RC =
4358 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4359
4360 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4361 Register VReg = MF.addLiveIn(Reg, RC);
4362 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4363 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4364 MachinePointerInfo(OrigArg, 4 * i));
4365 MemOps.push_back(Store);
4366 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4367 }
4368
4369 if (!MemOps.empty())
4370 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4371 return FrameIndex;
4372}
4373
4374// Setup stack frame, the va_list pointer will start from.
4375void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4376 const SDLoc &dl, SDValue &Chain,
4377 unsigned ArgOffset,
4378 unsigned TotalArgRegsSaveSize,
4379 bool ForceMutable) const {
4380 MachineFunction &MF = DAG.getMachineFunction();
4381 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4382
4383 // Try to store any remaining integer argument regs
4384 // to their spots on the stack so that they may be loaded by dereferencing
4385 // the result of va_next.
4386 // If there is no regs to be stored, just point address after last
4387 // argument passed via stack.
4388 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4389 CCInfo.getInRegsParamsCount(),
4390 CCInfo.getNextStackOffset(),
4391 std::max(4U, TotalArgRegsSaveSize));
4392 AFI->setVarArgsFrameIndex(FrameIndex);
4393}
4394
4395bool ARMTargetLowering::splitValueIntoRegisterParts(
4396 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4397 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
4398 bool IsABIRegCopy = CC.has_value();
4399 EVT ValueVT = Val.getValueType();
4400 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4401 PartVT == MVT::f32) {
4402 unsigned ValueBits = ValueVT.getSizeInBits();
4403 unsigned PartBits = PartVT.getSizeInBits();
4404 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4405 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4406 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4407 Parts[0] = Val;
4408 return true;
4409 }
4410 return false;
4411}
4412
4413SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4414 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4415 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
4416 bool IsABIRegCopy = CC.has_value();
4417 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4418 PartVT == MVT::f32) {
4419 unsigned ValueBits = ValueVT.getSizeInBits();
4420 unsigned PartBits = PartVT.getSizeInBits();
4421 SDValue Val = Parts[0];
4422
4423 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4424 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4425 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4426 return Val;
4427 }
4428 return SDValue();
4429}
4430
4431SDValue ARMTargetLowering::LowerFormalArguments(
4432 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4433 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4434 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4435 MachineFunction &MF = DAG.getMachineFunction();
4436 MachineFrameInfo &MFI = MF.getFrameInfo();
4437
4438 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4439
4440 // Assign locations to all of the incoming arguments.
4441 SmallVector<CCValAssign, 16> ArgLocs;
4442 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4443 *DAG.getContext());
4444 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4445
4446 SmallVector<SDValue, 16> ArgValues;
4447 SDValue ArgValue;
4448 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4449 unsigned CurArgIdx = 0;
4450
4451 // Initially ArgRegsSaveSize is zero.
4452 // Then we increase this value each time we meet byval parameter.
4453 // We also increase this value in case of varargs function.
4454 AFI->setArgRegsSaveSize(0);
4455
4456 // Calculate the amount of stack space that we need to allocate to store
4457 // byval and variadic arguments that are passed in registers.
4458 // We need to know this before we allocate the first byval or variadic
4459 // argument, as they will be allocated a stack slot below the CFA (Canonical
4460 // Frame Address, the stack pointer at entry to the function).
4461 unsigned ArgRegBegin = ARM::R4;
4462 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4463 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4464 break;
4465
4466 CCValAssign &VA = ArgLocs[i];
4467 unsigned Index = VA.getValNo();
4468 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4469 if (!Flags.isByVal())
4470 continue;
4471
4472 assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4472, __extension__
__PRETTY_FUNCTION__))
;
4473 unsigned RBegin, REnd;
4474 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4475 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4476
4477 CCInfo.nextInRegsParam();
4478 }
4479 CCInfo.rewindByValRegsInfo();
4480
4481 int lastInsIndex = -1;
4482 if (isVarArg && MFI.hasVAStart()) {
4483 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4484 if (RegIdx != array_lengthof(GPRArgRegs))
4485 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4486 }
4487
4488 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4489 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4490 auto PtrVT = getPointerTy(DAG.getDataLayout());
4491
4492 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4493 CCValAssign &VA = ArgLocs[i];
4494 if (Ins[VA.getValNo()].isOrigArg()) {
4495 std::advance(CurOrigArg,
4496 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4497 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4498 }
4499 // Arguments stored in registers.
4500 if (VA.isRegLoc()) {
4501 EVT RegVT = VA.getLocVT();
4502
4503 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4504 // f64 and vector types are split up into multiple registers or
4505 // combinations of registers and stack slots.
4506 SDValue ArgValue1 =
4507 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4508 VA = ArgLocs[++i]; // skip ahead to next loc
4509 SDValue ArgValue2;
4510 if (VA.isMemLoc()) {
4511 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4512 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4513 ArgValue2 = DAG.getLoad(
4514 MVT::f64, dl, Chain, FIN,
4515 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4516 } else {
4517 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4518 }
4519 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4520 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4521 ArgValue1, DAG.getIntPtrConstant(0, dl));
4522 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4523 ArgValue2, DAG.getIntPtrConstant(1, dl));
4524 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4525 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4526 } else {
4527 const TargetRegisterClass *RC;
4528
4529 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4530 RC = &ARM::HPRRegClass;
4531 else if (RegVT == MVT::f32)
4532 RC = &ARM::SPRRegClass;
4533 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4534 RegVT == MVT::v4bf16)
4535 RC = &ARM::DPRRegClass;
4536 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4537 RegVT == MVT::v8bf16)
4538 RC = &ARM::QPRRegClass;
4539 else if (RegVT == MVT::i32)
4540 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4541 : &ARM::GPRRegClass;
4542 else
4543 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4543)
;
4544
4545 // Transform the arguments in physical registers into virtual ones.
4546 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4547 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4548
4549 // If this value is passed in r0 and has the returned attribute (e.g.
4550 // C++ 'structors), record this fact for later use.
4551 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4552 AFI->setPreservesR0();
4553 }
4554 }
4555
4556 // If this is an 8 or 16-bit value, it is really passed promoted
4557 // to 32 bits. Insert an assert[sz]ext to capture this, then
4558 // truncate to the right size.
4559 switch (VA.getLocInfo()) {
4560 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4560)
;
4561 case CCValAssign::Full: break;
4562 case CCValAssign::BCvt:
4563 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4564 break;
4565 case CCValAssign::SExt:
4566 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4567 DAG.getValueType(VA.getValVT()));
4568 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4569 break;
4570 case CCValAssign::ZExt:
4571 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4572 DAG.getValueType(VA.getValVT()));
4573 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4574 break;
4575 }
4576
4577 // f16 arguments have their size extended to 4 bytes and passed as if they
4578 // had been copied to the LSBs of a 32-bit register.
4579 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4580 if (VA.needsCustom() &&
4581 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4582 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4583
4584 InVals.push_back(ArgValue);
4585 } else { // VA.isRegLoc()
4586 // Only arguments passed on the stack should make it here.
4587 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp",
4587, __extension__ __PRETTY_FUNCTION__))
;
4588 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 &&
"i64 should already be lowered") ? void (0) : __assert_fail (
"VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4588, __extension__
__PRETTY_FUNCTION__))
;
4589
4590 int index = VA.getValNo();
4591
4592 // Some Ins[] entries become multiple ArgLoc[] entries.
4593 // Process them only once.
4594 if (index != lastInsIndex)
4595 {
4596 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4597 // FIXME: For now, all byval parameter objects are marked mutable.
4598 // This can be changed with more analysis.
4599 // In case of tail call optimization mark all arguments mutable.
4600 // Since they could be overwritten by lowering of arguments in case of
4601 // a tail call.
4602 if (Flags.isByVal()) {
4603 assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4604, __extension__
__PRETTY_FUNCTION__))
4604 "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4604, __extension__
__PRETTY_FUNCTION__))
;
4605 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4606
4607 int FrameIndex = StoreByValRegs(
4608 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4609 VA.getLocMemOffset(), Flags.getByValSize());
4610 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4611 CCInfo.nextInRegsParam();
4612 } else {
4613 unsigned FIOffset = VA.getLocMemOffset();
4614 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4615 FIOffset, true);
4616
4617 // Create load nodes to retrieve arguments from the stack.
4618 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4619 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4620 MachinePointerInfo::getFixedStack(
4621 DAG.getMachineFunction(), FI)));
4622 }
4623 lastInsIndex = index;
4624 }
4625 }
4626 }
4627
4628 // varargs
4629 if (isVarArg && MFI.hasVAStart()) {
4630 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4631 TotalArgRegsSaveSize);
4632 if (AFI->isCmseNSEntryFunction()) {
4633 DiagnosticInfoUnsupported Diag(
4634 DAG.getMachineFunction().getFunction(),
4635 "secure entry function must not be variadic", dl.getDebugLoc());
4636 DAG.getContext()->diagnose(Diag);
4637 }
4638 }
4639
4640 unsigned StackArgSize = CCInfo.getNextStackOffset();
4641 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4642 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4643 // The only way to guarantee a tail call is if the callee restores its
4644 // argument area, but it must also keep the stack aligned when doing so.
4645 const DataLayout &DL = DAG.getDataLayout();
4646 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4647
4648 AFI->setArgumentStackToRestore(StackArgSize);
4649 }
4650 AFI->setArgumentStackSize(StackArgSize);
4651
4652 if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4653 DiagnosticInfoUnsupported Diag(
4654 DAG.getMachineFunction().getFunction(),
4655 "secure entry function requires arguments on stack", dl.getDebugLoc());
4656 DAG.getContext()->diagnose(Diag);
4657 }
4658
4659 return Chain;
4660}
4661
4662/// isFloatingPointZero - Return true if this is +0.0.
4663static bool isFloatingPointZero(SDValue Op) {
4664 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
11
Calling 'dyn_cast<llvm::ConstantFPSDNode, llvm::SDValue>'
19
Returning from 'dyn_cast<llvm::ConstantFPSDNode, llvm::SDValue>'
20
Assuming 'CFP' is null
4665 return CFP->getValueAPF().isPosZero();
4666 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4667 // Maybe this has already been legalized into the constant pool?
4668 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
21
Calling 'SDValue::getOperand'
4669 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4670 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4671 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4672 return CFP->getValueAPF().isPosZero();
4673 }
4674 } else if (Op->getOpcode() == ISD::BITCAST &&
4675 Op->getValueType(0) == MVT::f64) {
4676 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4677 // created by LowerConstantFP().
4678 SDValue BitcastOp = Op->getOperand(0);
4679 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4680 isNullConstant(BitcastOp->getOperand(0)))
4681 return true;
4682 }
4683 return false;
4684}
4685
4686/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4687/// the given operands.
4688SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4689 SDValue &ARMcc, SelectionDAG &DAG,
4690 const SDLoc &dl) const {
4691 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4692 unsigned C = RHSC->getZExtValue();
4693 if (!isLegalICmpImmediate((int32_t)C)) {
4694 // Constant does not fit, try adjusting it by one.
4695 switch (CC) {
4696 default: break;
4697 case ISD::SETLT:
4698 case ISD::SETGE:
4699 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4700 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4701 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4702 }
4703 break;
4704 case ISD::SETULT:
4705 case ISD::SETUGE:
4706 if (C != 0 && isLegalICmpImmediate(C-1)) {
4707 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4708 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4709 }
4710 break;
4711 case ISD::SETLE:
4712 case ISD::SETGT:
4713 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4714 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4715 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4716 }
4717 break;
4718 case ISD::SETULE:
4719 case ISD::SETUGT:
4720 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4721 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4722 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4723 }
4724 break;
4725 }
4726 }
4727 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4728 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4729 // In ARM and Thumb-2, the compare instructions can shift their second
4730 // operand.
4731 CC = ISD::getSetCCSwappedOperands(CC);
4732 std::swap(LHS, RHS);
4733 }
4734
4735 // Thumb1 has very limited immediate modes, so turning an "and" into a
4736 // shift can save multiple instructions.
4737 //
4738 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4739 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4740 // own. If it's the operand to an unsigned comparison with an immediate,
4741 // we can eliminate one of the shifts: we transform
4742 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4743 //
4744 // We avoid transforming cases which aren't profitable due to encoding
4745 // details:
4746 //
4747 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4748 // would not; in that case, we're essentially trading one immediate load for
4749 // another.
4750 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4751 // 3. C2 is zero; we have other code for this special case.
4752 //
4753 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4754 // instruction, since the AND is always one instruction anyway, but we could
4755 // use narrow instructions in some cases.
4756 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4757 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4758 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4759 !isSignedIntSetCC(CC)) {
4760 unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4761 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4762 uint64_t RHSV = RHSC->getZExtValue();
4763 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4764 unsigned ShiftBits = countLeadingZeros(Mask);
4765 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4766 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4767 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4768 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4769 }
4770 }
4771 }
4772
4773 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4774 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4775 // way a cmp would.
4776 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4777 // some tweaks to the heuristics for the previous and->shift transform.
4778 // FIXME: Optimize cases where the LHS isn't a shift.
4779 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4780 isa<ConstantSDNode>(RHS) &&
4781 cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4782 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4783 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4784 unsigned ShiftAmt =
4785 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4786 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4787 DAG.getVTList(MVT::i32, MVT::i32),
4788 LHS.getOperand(0),
4789 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4790 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4791 Shift.getValue(1), SDValue());
4792 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4793 return Chain.getValue(1);
4794 }
4795
4796 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4797
4798 // If the RHS is a constant zero then the V (overflow) flag will never be
4799 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4800 // simpler for other passes (like the peephole optimiser) to deal with.
4801 if (isNullConstant(RHS)) {
4802 switch (CondCode) {
4803 default: break;
4804 case ARMCC::GE:
4805 CondCode = ARMCC::PL;
4806 break;
4807 case ARMCC::LT:
4808 CondCode = ARMCC::MI;
4809 break;
4810 }
4811 }
4812
4813 ARMISD::NodeType CompareType;
4814 switch (CondCode) {
4815 default:
4816 CompareType = ARMISD::CMP;
4817 break;
4818 case ARMCC::EQ:
4819 case ARMCC::NE:
4820 // Uses only Z Flag
4821 CompareType = ARMISD::CMPZ;
4822 break;
4823 }
4824 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4825 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4826}
4827
4828/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4829SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4830 SelectionDAG &DAG, const SDLoc &dl,
4831 bool Signaling) const {
4832 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType
() != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4832, __extension__
__PRETTY_FUNCTION__))
;
7
Assuming the condition is true
8
'?' condition is true
4833 SDValue Cmp;
4834 if (!isFloatingPointZero(RHS))
9
Value assigned to 'Op.Node'
10
Calling 'isFloatingPointZero'
4835 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4836 dl, MVT::Glue, LHS, RHS);
4837 else
4838 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4839 dl, MVT::Glue, LHS);
4840 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4841}
4842
4843/// duplicateCmp - Glue values can have only one use, so this function
4844/// duplicates a comparison node.
4845SDValue
4846ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4847 unsigned Opc = Cmp.getOpcode();
4848 SDLoc DL(Cmp);
4849 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4850 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4851
4852 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4852, __extension__
__PRETTY_FUNCTION__))
;
4853 Cmp = Cmp.getOperand(0);
4854 Opc = Cmp.getOpcode();
4855 if (Opc == ARMISD::CMPFP)
4856 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4857 else {
4858 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4858, __extension__
__PRETTY_FUNCTION__))
;
4859 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4860 }
4861 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4862}
4863
4864// This function returns three things: the arithmetic computation itself
4865// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4866// comparison and the condition code define the case in which the arithmetic
4867// computation *does not* overflow.
4868std::pair<SDValue, SDValue>
4869ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4870 SDValue &ARMcc) const {
4871 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4871, __extension__
__PRETTY_FUNCTION__))
;
4872
4873 SDValue Value, OverflowCmp;
4874 SDValue LHS = Op.getOperand(0);
4875 SDValue RHS = Op.getOperand(1);
4876 SDLoc dl(Op);
4877
4878 // FIXME: We are currently always generating CMPs because we don't support
4879 // generating CMN through the backend. This is not as good as the natural
4880 // CMP case because it causes a register dependency and cannot be folded
4881 // later.
4882
4883 switch (Op.getOpcode()) {
4884 default:
4885 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4885)
;
4886 case ISD::SADDO:
4887 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4888 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4889 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4890 break;
4891 case ISD::UADDO:
4892 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4893 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4894 // We do not use it in the USUBO case as Value may not be used.
4895 Value = DAG.getNode(ARMISD::ADDC, dl,
4896 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4897 .getValue(0);
4898 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4899 break;
4900 case ISD::SSUBO:
4901 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4902 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4903 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4904 break;
4905 case ISD::USUBO:
4906 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4907 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4908 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4909 break;
4910 case ISD::UMULO:
4911 // We generate a UMUL_LOHI and then check if the high word is 0.
4912 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4913 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4914 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4915 LHS, RHS);
4916 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4917 DAG.getConstant(0, dl, MVT::i32));
4918 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4919 break;
4920 case ISD::SMULO:
4921 // We generate a SMUL_LOHI and then check if all the bits of the high word
4922 // are the same as the sign bit of the low word.
4923 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4924 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4925 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4926 LHS, RHS);
4927 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4928 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4929 Value.getValue(0),
4930 DAG.getConstant(31, dl, MVT::i32)));
4931 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4932 break;
4933 } // switch (...)
4934
4935 return std::make_pair(Value, OverflowCmp);
4936}
4937
4938SDValue
4939ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4940 // Let legalize expand this if it isn't a legal type yet.
4941 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4942 return SDValue();
4943
4944 SDValue Value, OverflowCmp;
4945 SDValue ARMcc;
4946 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4947 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4948 SDLoc dl(Op);
4949 // We use 0 and 1 as false and true values.
4950 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4951 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4952 EVT VT = Op.getValueType();
4953
4954 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4955 ARMcc, CCR, OverflowCmp);
4956
4957 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4958 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4959}
4960
4961static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4962 SelectionDAG &DAG) {
4963 SDLoc DL(BoolCarry);
4964 EVT CarryVT = BoolCarry.getValueType();
4965
4966 // This converts the boolean value carry into the carry flag by doing
4967 // ARMISD::SUBC Carry, 1
4968 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4969 DAG.getVTList(CarryVT, MVT::i32),
4970 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4971 return Carry.getValue(1);
4972}
4973
4974static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
4975 SelectionDAG &DAG) {
4976 SDLoc DL(Flags);
4977
4978 // Now convert the carry flag into a boolean carry. We do this
4979 // using ARMISD:ADDE 0, 0, Carry
4980 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4981 DAG.getConstant(0, DL, MVT::i32),
4982 DAG.getConstant(0, DL, MVT::i32), Flags);
4983}
4984
4985SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4986 SelectionDAG &DAG) const {
4987 // Let legalize expand this if it isn't a legal type yet.
4988 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4989 return SDValue();
4990
4991 SDValue LHS = Op.getOperand(0);
4992 SDValue RHS = Op.getOperand(1);
4993 SDLoc dl(Op);
4994
4995 EVT VT = Op.getValueType();
4996 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4997 SDValue Value;
4998 SDValue Overflow;
4999 switch (Op.getOpcode()) {
5000 default:
5001 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5001)
;
5002 case ISD::UADDO:
5003 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5004 // Convert the carry flag into a boolean value.
5005 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5006 break;
5007 case ISD::USUBO: {
5008 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5009 // Convert the carry flag into a boolean value.
5010 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5011 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5012 // value. So compute 1 - C.
5013 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5014 DAG.getConstant(1, dl, MVT::i32), Overflow);
5015 break;
5016 }
5017 }
5018
5019 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5020}
5021
5022static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
5023 const ARMSubtarget *Subtarget) {
5024 EVT VT = Op.getValueType();
5025 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
5026 return SDValue();
5027 if (!VT.isSimple())
5028 return SDValue();
5029
5030 unsigned NewOpcode;
5031 switch (VT.getSimpleVT().SimpleTy) {
5032 default:
5033 return SDValue();
5034 case MVT::i8:
5035 switch (Op->getOpcode()) {
5036 case ISD::UADDSAT:
5037 NewOpcode = ARMISD::UQADD8b;
5038 break;
5039 case ISD::SADDSAT:
5040 NewOpcode = ARMISD::QADD8b;
5041 break;
5042 case ISD::USUBSAT:
5043 NewOpcode = ARMISD::UQSUB8b;
5044 break;
5045 case ISD::SSUBSAT:
5046 NewOpcode = ARMISD::QSUB8b;
5047 break;
5048 }
5049 break;
5050 case MVT::i16:
5051 switch (Op->getOpcode()) {
5052 case ISD::UADDSAT:
5053 NewOpcode = ARMISD::UQADD16b;
5054 break;
5055 case ISD::SADDSAT:
5056 NewOpcode = ARMISD::QADD16b;
5057 break;
5058 case ISD::USUBSAT:
5059 NewOpcode = ARMISD::UQSUB16b;
5060 break;
5061 case ISD::SSUBSAT:
5062 NewOpcode = ARMISD::QSUB16b;
5063 break;
5064 }
5065 break;
5066 }
5067
5068 SDLoc dl(Op);
5069 SDValue Add =
5070 DAG.getNode(NewOpcode, dl, MVT::i32,
5071 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5072 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5073 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5074}
5075
5076SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5077 SDValue Cond = Op.getOperand(0);
5078 SDValue SelectTrue = Op.getOperand(1);
5079 SDValue SelectFalse = Op.getOperand(2);
5080 SDLoc dl(Op);
5081 unsigned Opc = Cond.getOpcode();
5082
5083 if (Cond.getResNo() == 1 &&
5084 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5085 Opc == ISD::USUBO)) {
5086 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5087 return SDValue();
5088
5089 SDValue Value, OverflowCmp;
5090 SDValue ARMcc;
5091 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5092 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5093 EVT VT = Op.getValueType();
5094
5095 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5096 OverflowCmp, DAG);
5097 }
5098
5099 // Convert:
5100 //
5101 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5102 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5103 //
5104 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5105 const ConstantSDNode *CMOVTrue =
5106 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5107 const ConstantSDNode *CMOVFalse =
5108 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5109
5110 if (CMOVTrue && CMOVFalse) {
5111 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5112 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5113
5114 SDValue True;
5115 SDValue False;
5116 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5117 True = SelectTrue;
5118 False = SelectFalse;
5119 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5120 True = SelectFalse;
5121 False = SelectTrue;
5122 }
5123
5124 if (True.getNode() && False.getNode()) {
5125 EVT VT = Op.getValueType();
5126 SDValue ARMcc = Cond.getOperand(2);
5127 SDValue CCR = Cond.getOperand(3);
5128 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5129 assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void (
0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5129, __extension__ __PRETTY_FUNCTION__))
;
5130 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5131 }
5132 }
5133 }
5134
5135 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5136 // undefined bits before doing a full-word comparison with zero.
5137 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5138 DAG.getConstant(1, dl, Cond.getValueType()));
5139
5140 return DAG.getSelectCC(dl, Cond,
5141 DAG.getConstant(0, dl, Cond.getValueType()),
5142 SelectTrue, SelectFalse, ISD::SETNE);
5143}
5144
5145static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
5146 bool &swpCmpOps, bool &swpVselOps) {
5147 // Start by selecting the GE condition code for opcodes that return true for
5148 // 'equality'
5149 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5150 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5151 CondCode = ARMCC::GE;
5152
5153 // and GT for opcodes that return false for 'equality'.
5154 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5155 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5156 CondCode = ARMCC::GT;
5157
5158 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5159 // to swap the compare operands.
5160 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5161 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5162 swpCmpOps = true;
5163
5164 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5165 // If we have an unordered opcode, we need to swap the operands to the VSEL
5166 // instruction (effectively negating the condition).
5167 //
5168 // This also has the effect of swapping which one of 'less' or 'greater'
5169 // returns true, so we also swap the compare operands. It also switches
5170 // whether we return true for 'equality', so we compensate by picking the
5171 // opposite condition code to our original choice.
5172 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5173 CC == ISD::SETUGT) {
5174 swpCmpOps = !swpCmpOps;
5175 swpVselOps = !swpVselOps;
5176 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5177 }
5178
5179 // 'ordered' is 'anything but unordered', so use the VS condition code and
5180 // swap the VSEL operands.
5181 if (CC == ISD::SETO) {
5182 CondCode = ARMCC::VS;
5183 swpVselOps = true;
5184 }
5185
5186 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5187 // code and swap the VSEL operands. Also do this if we don't care about the
5188 // unordered case.
5189 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5190 CondCode = ARMCC::EQ;
5191 swpVselOps = true;
5192 }
5193}
5194
5195SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5196 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5197 SDValue Cmp, SelectionDAG &DAG) const {
5198 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5199 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5200 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5201 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5202 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5203
5204 SDValue TrueLow = TrueVal.getValue(0);
5205 SDValue TrueHigh = TrueVal.getValue(1);
5206 SDValue FalseLow = FalseVal.getValue(0);
5207 SDValue FalseHigh = FalseVal.getValue(1);
5208
5209 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5210 ARMcc, CCR, Cmp);
5211 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5212 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5213
5214 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5215 } else {
5216 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5217 Cmp);
5218 }
5219}
5220
5221static bool isGTorGE(ISD::CondCode CC) {
5222 return CC == ISD::SETGT || CC == ISD::SETGE;
5223}
5224
5225static bool isLTorLE(ISD::CondCode CC) {
5226 return CC == ISD::SETLT || CC == ISD::SETLE;
5227}
5228
5229// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5230// All of these conditions (and their <= and >= counterparts) will do:
5231// x < k ? k : x
5232// x > k ? x : k
5233// k < x ? x : k
5234// k > x ? k : x
5235static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5236 const SDValue TrueVal, const SDValue FalseVal,
5237 const ISD::CondCode CC, const SDValue K) {
5238 return (isGTorGE(CC) &&
5239 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5240 (isLTorLE(CC) &&
5241 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5242}
5243
5244// Check if two chained conditionals could be converted into SSAT or USAT.
5245//
5246// SSAT can replace a set of two conditional selectors that bound a number to an
5247// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5248//
5249// x < -k ? -k : (x > k ? k : x)
5250// x < -k ? -k : (x < k ? x : k)
5251// x > -k ? (x > k ? k : x) : -k
5252// x < k ? (x < -k ? -k : x) : k
5253// etc.
5254//
5255// LLVM canonicalizes these to either a min(max()) or a max(min())
5256// pattern. This function tries to match one of these and will return a SSAT
5257// node if successful.
5258//
5259// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5260// is a power of 2.
5261static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
5262 EVT VT = Op.getValueType();
5263 SDValue V1 = Op.getOperand(0);
5264 SDValue K1 = Op.getOperand(1);
5265 SDValue TrueVal1 = Op.getOperand(2);
5266 SDValue FalseVal1 = Op.getOperand(3);
5267 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5268
5269 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5270 if (Op2.getOpcode() != ISD::SELECT_CC)
5271 return SDValue();
5272
5273 SDValue V2 = Op2.getOperand(0);
5274 SDValue K2 = Op2.getOperand(1);
5275 SDValue TrueVal2 = Op2.getOperand(2);
5276 SDValue FalseVal2 = Op2.getOperand(3);
5277 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5278
5279 SDValue V1Tmp = V1;
5280 SDValue V2Tmp = V2;
5281
5282 // Check that the registers and the constants match a max(min()) or min(max())
5283 // pattern
5284 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5285 K2 != FalseVal2 ||
5286 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5287 return SDValue();
5288
5289 // Check that the constant in the lower-bound check is
5290 // the opposite of the constant in the upper-bound check
5291 // in 1's complement.
5292 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5293 return SDValue();
5294
5295 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5296 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5297 int64_t PosVal = std::max(Val1, Val2);
5298 int64_t NegVal = std::min(Val1, Val2);
5299
5300 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5301 !isPowerOf2_64(PosVal + 1))
5302 return SDValue();
5303
5304 // Handle the difference between USAT (unsigned) and SSAT (signed)
5305 // saturation
5306 // At this point, PosVal is guaranteed to be positive
5307 uint64_t K = PosVal;
5308 SDLoc dl(Op);
5309 if (Val1 == ~Val2)
5310 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5311 DAG.getConstant(countTrailingOnes(K), dl, VT));
5312 if (NegVal == 0)
5313 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5314 DAG.getConstant(countTrailingOnes(K), dl, VT));
5315
5316 return SDValue();
5317}
5318
5319// Check if a condition of the type x < k ? k : x can be converted into a
5320// bit operation instead of conditional moves.
5321// Currently this is allowed given:
5322// - The conditions and values match up
5323// - k is 0 or -1 (all ones)
5324// This function will not check the last condition, thats up to the caller
5325// It returns true if the transformation can be made, and in such case
5326// returns x in V, and k in SatK.
5327static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
5328 SDValue &SatK)
5329{
5330 SDValue LHS = Op.getOperand(0);
5331 SDValue RHS = Op.getOperand(1);
5332 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5333 SDValue TrueVal = Op.getOperand(2);
5334 SDValue FalseVal = Op.getOperand(3);
5335
5336 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5337 ? &RHS
5338 : nullptr;
5339
5340 // No constant operation in comparison, early out
5341 if (!K)
5342 return false;
5343
5344 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5345 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5346 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5347
5348 // If the constant on left and right side, or variable on left and right,
5349 // does not match, early out
5350 if (*K != KTmp || V != VTmp)
5351 return false;
5352
5353 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5354 SatK = *K;
5355 return true;
5356 }
5357
5358 return false;
5359}
5360
5361bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5362 if (VT == MVT::f32)
5363 return !Subtarget->hasVFP2Base();
5364 if (VT == MVT::f64)
5365 return !Subtarget->hasFP64();
5366 if (VT == MVT::f16)
5367 return !Subtarget->hasFullFP16();
5368 return false;
5369}
5370
5371SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5372 EVT VT = Op.getValueType();
5373 SDLoc dl(Op);
5374
5375 // Try to convert two saturating conditional selects into a single SSAT
5376 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5377 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5378 return SatValue;
5379
5380 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5381 // into more efficient bit operations, which is possible when k is 0 or -1
5382 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5383 // single instructions. On Thumb the shift and the bit operation will be two
5384 // instructions.
5385 // Only allow this transformation on full-width (32-bit) operations
5386 SDValue LowerSatConstant;
5387 SDValue SatValue;
5388 if (VT == MVT::i32 &&
5389 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5390 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5391 DAG.getConstant(31, dl, VT));
5392 if (isNullConstant(LowerSatConstant)) {
5393 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5394 DAG.getAllOnesConstant(dl, VT));
5395 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5396 } else if (isAllOnesConstant(LowerSatConstant))
5397 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5398 }
5399
5400 SDValue LHS = Op.getOperand(0);
5401 SDValue RHS = Op.getOperand(1);
5402 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5403 SDValue TrueVal = Op.getOperand(2);
5404 SDValue FalseVal = Op.getOperand(3);
5405 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5406 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5407
5408 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5409 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5410 unsigned TVal = CTVal->getZExtValue();
5411 unsigned FVal = CFVal->getZExtValue();
5412 unsigned Opcode = 0;
5413
5414 if (TVal == ~FVal) {
5415 Opcode = ARMISD::CSINV;
5416 } else if (TVal == ~FVal + 1) {
5417 Opcode = ARMISD::CSNEG;
5418 } else if (TVal + 1 == FVal) {
5419 Opcode = ARMISD::CSINC;
5420 } else if (TVal == FVal + 1) {
5421 Opcode = ARMISD::CSINC;
5422 std::swap(TrueVal, FalseVal);
5423 std::swap(TVal, FVal);
5424 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5425 }
5426
5427 if (Opcode) {
5428 // If one of the constants is cheaper than another, materialise the
5429 // cheaper one and let the csel generate the other.
5430 if (Opcode != ARMISD::CSINC &&
5431 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5432 std::swap(TrueVal, FalseVal);
5433 std::swap(TVal, FVal);
5434 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5435 }
5436
5437 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5438 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5439 // -(-a) == a, but (a+1)+1 != a).
5440 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5441 std::swap(TrueVal, FalseVal);
5442 std::swap(TVal, FVal);
5443 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5444 }
5445
5446 // Drops F's value because we can get it by inverting/negating TVal.
5447 FalseVal = TrueVal;
5448
5449 SDValue ARMcc;
5450 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5451 EVT VT = TrueVal.getValueType();
5452 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5453 }
5454 }
5455
5456 if (isUnsupportedFloatingType(LHS.getValueType())) {
5457 DAG.getTargetLoweringInfo().softenSetCCOperands(
5458 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5459
5460 // If softenSetCCOperands only returned one value, we should compare it to
5461 // zero.
5462 if (!RHS.getNode()) {
5463 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5464 CC = ISD::SETNE;
5465 }
5466 }
5467
5468 if (LHS.getValueType() == MVT::i32) {
5469 // Try to generate VSEL on ARMv8.
5470 // The VSEL instruction can't use all the usual ARM condition
5471 // codes: it only has two bits to select the condition code, so it's
5472 // constrained to use only GE, GT, VS and EQ.
5473 //
5474 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5475 // swap the operands of the previous compare instruction (effectively
5476 // inverting the compare condition, swapping 'less' and 'greater') and
5477 // sometimes need to swap the operands to the VSEL (which inverts the
5478 // condition in the sense of firing whenever the previous condition didn't)
5479 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5480 TrueVal.getValueType() == MVT::f32 ||
5481 TrueVal.getValueType() == MVT::f64)) {
5482 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5483 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5484 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5485 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5486 std::swap(TrueVal, FalseVal);
5487 }
5488 }
5489
5490 SDValue ARMcc;
5491 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5492 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5493 // Choose GE over PL, which vsel does now support
5494 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5495 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5496 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5497 }
5498
5499 ARMCC::CondCodes CondCode, CondCode2;
5500 FPCCToARMCC(CC, CondCode, CondCode2);
5501
5502 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5503 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5504 // must use VSEL (limited condition codes), due to not having conditional f16
5505 // moves.
5506 if (Subtarget->hasFPARMv8Base() &&
5507 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5508 (TrueVal.getValueType() == MVT::f16 ||
5509 TrueVal.getValueType() == MVT::f32 ||
5510 TrueVal.getValueType() == MVT::f64)) {
5511 bool swpCmpOps = false;
5512 bool swpVselOps = false;
5513 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5514
5515 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5516 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5517 if (swpCmpOps)
5518 std::swap(LHS, RHS);
5519 if (swpVselOps)
5520 std::swap(TrueVal, FalseVal);
5521 }
5522 }
5523
5524 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5525 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5526 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5527 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5528 if (CondCode2 != ARMCC::AL) {
5529 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5530 // FIXME: Needs another CMP because flag can have but one use.
5531 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5532 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5533 }
5534 return Result;
5535}
5536
5537/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5538/// to morph to an integer compare sequence.
5539static bool canChangeToInt(SDValue Op, bool &SeenZero,
5540 const ARMSubtarget *Subtarget) {
5541 SDNode *N = Op.getNode();
5542 if (!N->hasOneUse())
5543 // Otherwise it requires moving the value from fp to integer registers.
5544 return false;
5545 if (!N->getNumValues())
5546 return false;
5547 EVT VT = Op.getValueType();
5548 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5549 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5550 // vmrs are very slow, e.g. cortex-a8.
5551 return false;
5552
5553 if (isFloatingPointZero(Op)) {
5554 SeenZero = true;
5555 return true;
5556 }
5557 return ISD::isNormalLoad(N);
5558}
5559
5560static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
5561 if (isFloatingPointZero(Op))
5562 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5563
5564 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5565 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5566 Ld->getPointerInfo(), Ld->getAlign(),
5567 Ld->getMemOperand()->getFlags());
5568
5569 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5569)
;
5570}
5571
5572static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5573 SDValue &RetVal1, SDValue &RetVal2) {
5574 SDLoc dl(Op);
5575
5576 if (isFloatingPointZero(Op)) {
5577 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5578 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5579 return;
5580 }
5581
5582 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5583 SDValue Ptr = Ld->getBasePtr();
5584 RetVal1 =
5585 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5586 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5587
5588 EVT PtrType = Ptr.getValueType();
5589 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5590 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5591 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5592 Ld->getPointerInfo().getWithOffset(4),
5593 commonAlignment(Ld->getAlign(), 4),
5594 Ld->getMemOperand()->getFlags());
5595 return;
5596 }
5597
5598 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5598)
;
5599}
5600
5601/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5602/// f32 and even f64 comparisons to integer ones.
5603SDValue
5604ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5605 SDValue Chain = Op.getOperand(0);
5606 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5607 SDValue LHS = Op.getOperand(2);
5608 SDValue RHS = Op.getOperand(3);
5609 SDValue Dest = Op.getOperand(4);
5610 SDLoc dl(Op);
5611
5612 bool LHSSeenZero = false;
5613 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5614 bool RHSSeenZero = false;
5615 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5616 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5617 // If unsafe fp math optimization is enabled and there are no other uses of
5618 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5619 // to an integer comparison.
5620 if (CC == ISD::SETOEQ)
5621 CC = ISD::SETEQ;
5622 else if (CC == ISD::SETUNE)
5623 CC = ISD::SETNE;
5624
5625 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5626 SDValue ARMcc;
5627 if (LHS.getValueType() == MVT::f32) {
5628 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5629 bitcastf32Toi32(LHS, DAG), Mask);
5630 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5631 bitcastf32Toi32(RHS, DAG), Mask);
5632 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5633 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5634 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5635 Chain, Dest, ARMcc, CCR, Cmp);
5636 }
5637
5638 SDValue LHS1, LHS2;
5639 SDValue RHS1, RHS2;
5640 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5641 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5642 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5643 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5644 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5645 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5646 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5647 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5648 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5649 }
5650
5651 return SDValue();
5652}
5653
5654SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5655 SDValue Chain = Op.getOperand(0);
5656 SDValue Cond = Op.getOperand(1);
5657 SDValue Dest = Op.getOperand(2);
5658 SDLoc dl(Op);
5659
5660 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5661 // instruction.
5662 unsigned Opc = Cond.getOpcode();
5663 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5664 !Subtarget->isThumb1Only();
5665 if (Cond.getResNo() == 1 &&
5666 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5667 Opc == ISD::USUBO || OptimizeMul)) {
5668 // Only lower legal XALUO ops.
5669 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5670 return SDValue();
5671
5672 // The actual operation with overflow check.
5673 SDValue Value, OverflowCmp;
5674 SDValue ARMcc;
5675 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5676
5677 // Reverse the condition code.
5678 ARMCC::CondCodes CondCode =
5679 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5680 CondCode = ARMCC::getOppositeCondition(CondCode);
5681 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5682 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5683
5684 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5685 OverflowCmp);
5686 }
5687
5688 return SDValue();
5689}
5690
5691SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5692 SDValue Chain = Op.getOperand(0);
5693 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5694 SDValue LHS = Op.getOperand(2);
5695 SDValue RHS = Op.getOperand(3);
5696 SDValue Dest = Op.getOperand(4);
5697 SDLoc dl(Op);
5698
5699 if (isUnsupportedFloatingType(LHS.getValueType())) {
5700 DAG.getTargetLoweringInfo().softenSetCCOperands(
5701 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5702
5703 // If softenSetCCOperands only returned one value, we should compare it to
5704 // zero.
5705 if (!RHS.getNode()) {
5706 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5707 CC = ISD::SETNE;
5708 }
5709 }
5710
5711 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5712 // instruction.
5713 unsigned Opc = LHS.getOpcode();
5714 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5715 !Subtarget->isThumb1Only();
5716 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5717 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5718 Opc == ISD::USUBO || OptimizeMul) &&
5719 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5720 // Only lower legal XALUO ops.
5721 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5722 return SDValue();
5723
5724 // The actual operation with overflow check.
5725 SDValue Value, OverflowCmp;
5726 SDValue ARMcc;
5727 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5728
5729 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5730 // Reverse the condition code.
5731 ARMCC::CondCodes CondCode =
5732 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5733 CondCode = ARMCC::getOppositeCondition(CondCode);
5734 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5735 }
5736 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5737
5738 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5739 OverflowCmp);
5740 }
5741
5742 if (LHS.getValueType() == MVT::i32) {
5743 SDValue ARMcc;
5744 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5745 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5746 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5747 Chain, Dest, ARMcc, CCR, Cmp);
5748 }
5749
5750 if (getTargetMachine().Options.UnsafeFPMath &&
5751 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5752 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5753 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5754 return Result;
5755 }
5756
5757 ARMCC::CondCodes CondCode, CondCode2;
5758 FPCCToARMCC(CC, CondCode, CondCode2);
5759
5760 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5761 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5762 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5763 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5764 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5765 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5766 if (CondCode2 != ARMCC::AL) {
5767 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5768 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5769 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5770 }
5771 return Res;
5772}
5773
5774SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5775 SDValue Chain = Op.getOperand(0);
5776 SDValue Table = Op.getOperand(1);
5777 SDValue Index = Op.getOperand(2);
5778 SDLoc dl(Op);
5779
5780 EVT PTy = getPointerTy(DAG.getDataLayout());
5781 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5782 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5783 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5784 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5785 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5786 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5787 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5788 // which does another jump to the destination. This also makes it easier
5789 // to translate it to TBB / TBH later (Thumb2 only).
5790 // FIXME: This might not work if the function is extremely large.
5791 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5792 Addr, Op.getOperand(2), JTI);
5793 }
5794 if (isPositionIndependent() || Subtarget->isROPI()) {
5795 Addr =
5796 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5797 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5798 Chain = Addr.getValue(1);
5799 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5800 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5801 } else {
5802 Addr =
5803 DAG.getLoad(PTy, dl, Chain, Addr,
5804 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5805 Chain = Addr.getValue(1);
5806 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5807 }
5808}
5809
5810static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
5811 EVT VT = Op.getValueType();
5812 SDLoc dl(Op);
5813
5814 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5815 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5816 return Op;
5817 return DAG.UnrollVectorOp(Op.getNode());
5818 }
5819
5820 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5821
5822 EVT NewTy;
5823 const EVT OpTy = Op.getOperand(0).getValueType();
5824 if (OpTy == MVT::v4f32)
5825 NewTy = MVT::v4i32;
5826 else if (OpTy == MVT::v4f16 && HasFullFP16)
5827 NewTy = MVT::v4i16;
5828 else if (OpTy == MVT::v8f16 && HasFullFP16)
5829 NewTy = MVT::v8i16;
5830 else
5831 llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5831)
;
5832
5833 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5834 return DAG.UnrollVectorOp(Op.getNode());
5835
5836 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5837 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5838}
5839
5840SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5841 EVT VT = Op.getValueType();
5842 if (VT.isVector())
5843 return LowerVectorFP_TO_INT(Op, DAG);
5844
5845 bool IsStrict = Op->isStrictFPOpcode();
5846 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5847
5848 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5849 RTLIB::Libcall LC;
5850 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5851 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5852 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5853 Op.getValueType());
5854 else
5855 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5856 Op.getValueType());
5857 SDLoc Loc(Op);
5858 MakeLibCallOptions CallOptions;
5859 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5860 SDValue Result;
5861 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5862 CallOptions, Loc, Chain);
5863 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5864 }
5865
5866 // FIXME: Remove this when we have strict fp instruction selection patterns
5867 if (IsStrict) {
5868 SDLoc Loc(Op);
5869 SDValue Result =
5870 DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5871 : ISD::FP_TO_UINT,
5872 Loc, Op.getValueType(), SrcVal);
5873 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5874 }
5875
5876 return Op;
5877}
5878
5879static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
5880 const ARMSubtarget *Subtarget) {
5881 EVT VT = Op.getValueType();
5882 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5883 EVT FromVT = Op.getOperand(0).getValueType();
5884
5885 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5886 return Op;
5887 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5888 Subtarget->hasFP64())
5889 return Op;
5890 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5891 Subtarget->hasFullFP16())
5892 return Op;
5893 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5894 Subtarget->hasMVEFloatOps())
5895 return Op;
5896 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5897 Subtarget->hasMVEFloatOps())
5898 return Op;
5899
5900 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5901 return SDValue();
5902
5903 SDLoc DL(Op);
5904 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5905 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5906 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5907 DAG.getValueType(VT.getScalarType()));
5908 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5909 DAG.getConstant((1 << BW) - 1, DL, VT));
5910 if (IsSigned)
5911 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5912 DAG.getConstant(-(1 << BW), DL, VT));
5913 return Max;
5914}
5915
5916static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
5917 EVT VT = Op.getValueType();
5918 SDLoc dl(Op);
5919
5920 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5921 if (VT.getVectorElementType() == MVT::f32)
5922 return Op;
5923 return DAG.UnrollVectorOp(Op.getNode());
5924 }
5925
5926 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5928, __extension__
__PRETTY_FUNCTION__))
5927 Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5928, __extension__
__PRETTY_FUNCTION__))
5928 "Invalid type for custom lowering!")(static_cast <bool> ((Op.getOperand(0).getValueType() ==
MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16)
&& "Invalid type for custom lowering!") ? void (0) :
__assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5928, __extension__
__PRETTY_FUNCTION__))
;
5929
5930 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5931
5932 EVT DestVecType;
5933 if (VT == MVT::v4f32)
5934 DestVecType = MVT::v4i32;
5935 else if (VT == MVT::v4f16 && HasFullFP16)
5936 DestVecType = MVT::v4i16;
5937 else if (VT == MVT::v8f16 && HasFullFP16)
5938 DestVecType = MVT::v8i16;
5939 else
5940 return DAG.UnrollVectorOp(Op.getNode());
5941
5942 unsigned CastOpc;
5943 unsigned Opc;
5944 switch (Op.getOpcode()) {
5945 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5945)
;
5946 case ISD::SINT_TO_FP:
5947 CastOpc = ISD::SIGN_EXTEND;
5948 Opc = ISD::SINT_TO_FP;
5949 break;
5950 case ISD::UINT_TO_FP:
5951 CastOpc = ISD::ZERO_EXTEND;
5952 Opc = ISD::UINT_TO_FP;
5953 break;
5954 }
5955
5956 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5957 return DAG.getNode(Opc, dl, VT, Op);
5958}
5959
5960SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5961 EVT VT = Op.getValueType();
5962 if (VT.isVector())
5963 return LowerVectorINT_TO_FP(Op, DAG);
5964 if (isUnsupportedFloatingType(VT)) {
5965 RTLIB::Libcall LC;
5966 if (Op.getOpcode() == ISD::SINT_TO_FP)
5967 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5968 Op.getValueType());
5969 else
5970 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5971 Op.getValueType());
5972 MakeLibCallOptions CallOptions;
5973 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5974 CallOptions, SDLoc(Op)).first;
5975 }
5976
5977 return Op;
5978}
5979
5980SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5981 // Implement fcopysign with a fabs and a conditional fneg.
5982 SDValue Tmp0 = Op.getOperand(0);
5983 SDValue Tmp1 = Op.getOperand(1);
5984 SDLoc dl(Op);
5985 EVT VT = Op.getValueType();
5986 EVT SrcVT = Tmp1.getValueType();
5987 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5988 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5989 bool UseNEON = !InGPR && Subtarget->hasNEON();
5990
5991 if (UseNEON) {
5992 // Use VBSL to copy the sign bit.
5993 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5994 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5995 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5996 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5997 if (VT == MVT::f64)
5998 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5999 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6000 DAG.getConstant(32, dl, MVT::i32));
6001 else /*if (VT == MVT::f32)*/
6002 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6003 if (SrcVT == MVT::f32) {
6004 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6005 if (VT == MVT::f64)
6006 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6007 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6008 DAG.getConstant(32, dl, MVT::i32));
6009 } else if (VT == MVT::f32)
6010 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6011 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6012 DAG.getConstant(32, dl, MVT::i32));
6013 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6014 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6015
6016 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
6017 dl, MVT::i32);
6018 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6019 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6020 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6021
6022 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6023 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6024 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6025 if (VT == MVT::f32) {
6026 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6027 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6028 DAG.getConstant(0, dl, MVT::i32));
6029 } else {
6030 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6031 }
6032
6033 return Res;
6034 }
6035
6036 // Bitcast operand 1 to i32.
6037 if (SrcVT == MVT::f64)
6038 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6039 Tmp1).getValue(1);
6040 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6041
6042 // Or in the signbit with integer operations.
6043 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6044 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6045 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6046 if (VT == MVT::f32) {
6047 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6048 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6049 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6050 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6051 }
6052
6053 // f64: Or the high part with signbit and then combine two parts.
6054 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6055 Tmp0);
6056 SDValue Lo = Tmp0.getValue(0);
6057 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6058 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6059 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6060}
6061
6062SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6063 MachineFunction &MF = DAG.getMachineFunction();
6064 MachineFrameInfo &MFI = MF.getFrameInfo();
6065 MFI.setReturnAddressIsTaken(true);
6066
6067 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
6068 return SDValue();
6069
6070 EVT VT = Op.getValueType();
6071 SDLoc dl(Op);
6072 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6073 if (Depth) {
6074 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6075 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6076 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6077 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6078 MachinePointerInfo());
6079 }
6080
6081 // Return LR, which contains the return address. Mark it an implicit live-in.
6082 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6083 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6084}
6085
6086SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6087 const ARMBaseRegisterInfo &ARI =
6088 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6089 MachineFunction &MF = DAG.getMachineFunction();
6090 MachineFrameInfo &MFI = MF.getFrameInfo();
6091 MFI.setFrameAddressIsTaken(true);
6092
6093 EVT VT = Op.getValueType();
6094 SDLoc dl(Op); // FIXME probably not meaningful
6095 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6096 Register FrameReg = ARI.getFrameRegister(MF);
6097 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6098 while (Depth--)
6099 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6100 MachinePointerInfo());
6101 return FrameAddr;
6102}
6103
6104// FIXME? Maybe this could be a TableGen attribute on some registers and
6105// this table could be generated automatically from RegInfo.
6106Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6107 const MachineFunction &MF) const {
6108 Register Reg = StringSwitch<unsigned>(RegName)
6109 .Case("sp", ARM::SP)
6110 .Default(0);
6111 if (Reg)
6112 return Reg;
6113 report_fatal_error(Twine("Invalid register name \""
6114 + StringRef(RegName) + "\"."));
6115}
6116
6117// Result is 64 bit value so split into two 32 bit values and return as a
6118// pair of values.
6119static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
6120 SelectionDAG &DAG) {
6121 SDLoc DL(N);
6122
6123 // This function is only supposed to be called for i64 type destination.
6124 assert(N->getValueType(0) == MVT::i64(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6125, __extension__
__PRETTY_FUNCTION__))
6125 && "ExpandREAD_REGISTER called for non-i64 type result.")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6125, __extension__
__PRETTY_FUNCTION__))
;
6126
6127 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
6128 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6129 N->getOperand(0),
6130 N->getOperand(1));
6131
6132 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6133 Read.getValue(1)));
6134 Results.push_back(Read.getOperand(0));
6135}
6136
6137/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6138/// When \p DstVT, the destination type of \p BC, is on the vector
6139/// register bank and the source of bitcast, \p Op, operates on the same bank,
6140/// it might be possible to combine them, such that everything stays on the
6141/// vector register bank.
6142/// \p return The node that would replace \p BT, if the combine
6143/// is possible.
6144static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
6145 SelectionDAG &DAG) {
6146 SDValue Op = BC->getOperand(0);
6147 EVT DstVT = BC->getValueType(0);
6148
6149 // The only vector instruction that can produce a scalar (remember,
6150 // since the bitcast was about to be turned into VMOVDRR, the source
6151 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6152 // Moreover, we can do this combine only if there is one use.
6153 // Finally, if the destination type is not a vector, there is not
6154 // much point on forcing everything on the vector bank.
6155 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6156 !Op.hasOneUse())
6157 return SDValue();
6158
6159 // If the index is not constant, we will introduce an additional
6160 // multiply that will stick.
6161 // Give up in that case.
6162 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6163 if (!Index)
6164 return SDValue();
6165 unsigned DstNumElt = DstVT.getVectorNumElements();
6166
6167 // Compute the new index.
6168 const APInt &APIntIndex = Index->getAPIntValue();
6169 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6170 NewIndex *= APIntIndex;
6171 // Check if the new constant index fits into i32.
6172 if (NewIndex.getBitWidth() > 32)
6173 return SDValue();
6174
6175 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6176 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6177 SDLoc dl(Op);
6178 SDValue ExtractSrc = Op.getOperand(0);
6179 EVT VecVT = EVT::getVectorVT(
6180 *DAG.getContext(), DstVT.getScalarType(),
6181 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6182 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6183 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6184 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6185}
6186
6187/// ExpandBITCAST - If the target supports VFP, this function is called to
6188/// expand a bit convert where either the source or destination type is i64 to
6189/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6190/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6191/// vectors), since the legalizer won't know what to do with that.
6192SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6193 const ARMSubtarget *Subtarget) const {
6194 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6195 SDLoc dl(N);
6196 SDValue Op = N->getOperand(0);
6197
6198 // This function is only supposed to be called for i16 and i64 types, either
6199 // as the source or destination of the bit convert.
6200 EVT SrcVT = Op.getValueType();
6201 EVT DstVT = N->getValueType(0);
6202
6203 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6204 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6205 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6206 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6207
6208 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6209 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6210 return DAG.getNode(
6211 ISD::TRUNCATE, SDLoc(N), DstVT,
6212 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6213
6214 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6215 return SDValue();
6216
6217 // Turn i64->f64 into VMOVDRR.
6218 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6219 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6220 // if we can combine the bitcast with its source.
6221 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
6222 return Val;
6223
6224 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
6225 DAG.getConstant(0, dl, MVT::i32));
6226 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
6227 DAG.getConstant(1, dl, MVT::i32));
6228 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6229 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6230 }
6231
6232 // Turn f64->i64 into VMOVRRD.
6233 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6234 SDValue Cvt;
6235 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6236 SrcVT.getVectorNumElements() > 1)
6237 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6238 DAG.getVTList(MVT::i32, MVT::i32),
6239 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6240 else
6241 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6242 DAG.getVTList(MVT::i32, MVT::i32), Op);
6243 // Merge the pieces into a single i64 value.
6244 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6245 }
6246
6247 return SDValue();
6248}
6249
6250/// getZeroVector - Returns a vector of specified type with all zero elements.
6251/// Zero vectors are used to represent vector negation and in those cases
6252/// will be implemented with the NEON VNEG instruction. However, VNEG does
6253/// not support i64 elements, so sometimes the zero vectors will need to be
6254/// explicitly constructed. Regardless, use a canonical VMOV to create the
6255/// zero vector.
6256static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6257 assert(VT.isVector() && "Expected a vector type")(static_cast <bool> (VT.isVector() && "Expected a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6257, __extension__
__PRETTY_FUNCTION__))
;
6258 // The canonical modified immediate encoding of a zero vector is....0!
6259 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6260 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6261 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6262 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6263}
6264
6265/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6266/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6267SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6268 SelectionDAG &DAG) const {
6269 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6269, __extension__
__PRETTY_FUNCTION__))
;
6270 EVT VT = Op.getValueType();
6271 unsigned VTBits = VT.getSizeInBits();
6272 SDLoc dl(Op);
6273 SDValue ShOpLo = Op.getOperand(0);
6274 SDValue ShOpHi = Op.getOperand(1);
6275 SDValue ShAmt = Op.getOperand(2);
6276 SDValue ARMcc;
6277 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6278 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6279
6280 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6280, __extension__
__PRETTY_FUNCTION__))
;
6281
6282 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6283 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6284 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6285 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6286 DAG.getConstant(VTBits, dl, MVT::i32));
6287 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6288 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6289 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6290 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6291 ISD::SETGE, ARMcc, DAG, dl);
6292 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6293 ARMcc, CCR, CmpLo);
6294
6295 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6296 SDValue HiBigShift = Opc == ISD::SRA
6297 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6298 DAG.getConstant(VTBits - 1, dl, VT))
6299 : DAG.getConstant(0, dl, VT);
6300 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6301 ISD::SETGE, ARMcc, DAG, dl);
6302 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6303 ARMcc, CCR, CmpHi);
6304
6305 SDValue Ops[2] = { Lo, Hi };
6306 return DAG.getMergeValues(Ops, dl);
6307}
6308
6309/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6310/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6311SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6312 SelectionDAG &DAG) const {
6313 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6313, __extension__
__PRETTY_FUNCTION__))
;
6314 EVT VT = Op.getValueType();
6315 unsigned VTBits = VT.getSizeInBits();
6316 SDLoc dl(Op);
6317 SDValue ShOpLo = Op.getOperand(0);
6318 SDValue ShOpHi = Op.getOperand(1);
6319 SDValue ShAmt = Op.getOperand(2);
6320 SDValue ARMcc;
6321 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6322
6323 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6323, __extension__
__PRETTY_FUNCTION__))
;
6324 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6325 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6326 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6327 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6328 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6329
6330 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6331 DAG.getConstant(VTBits, dl, MVT::i32));
6332 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6333 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6334 ISD::SETGE, ARMcc, DAG, dl);
6335 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6336 ARMcc, CCR, CmpHi);
6337
6338 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6339 ISD::SETGE, ARMcc, DAG, dl);
6340 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6341 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6342 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6343
6344 SDValue Ops[2] = { Lo, Hi };
6345 return DAG.getMergeValues(Ops, dl);
6346}
6347
6348SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6349 SelectionDAG &DAG) const {
6350 // The rounding mode is in bits 23:22 of the FPSCR.
6351 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6352 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6353 // so that the shift + and get folded into a bitfield extract.
6354 SDLoc dl(Op);
6355 SDValue Chain = Op.getOperand(0);
6356 SDValue Ops[] = {Chain,
6357 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6358
6359 SDValue FPSCR =
6360 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6361 Chain = FPSCR.getValue(1);
6362 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6363 DAG.getConstant(1U << 22, dl, MVT::i32));
6364 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6365 DAG.getConstant(22, dl, MVT::i32));
6366 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6367 DAG.getConstant(3, dl, MVT::i32));
6368 return DAG.getMergeValues({And, Chain}, dl);
6369}
6370
6371SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6372 SelectionDAG &DAG) const {
6373 SDLoc DL(Op);
6374 SDValue Chain = Op->getOperand(0);
6375 SDValue RMValue = Op->getOperand(1);
6376
6377 // The rounding mode is in bits 23:22 of the FPSCR.
6378 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6379 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6380 // ((arg - 1) & 3) << 22).
6381 //
6382 // It is expected that the argument of llvm.set.rounding is within the
6383 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6384 // responsibility of the code generated llvm.set.rounding to ensure this
6385 // condition.
6386
6387 // Calculate new value of FPSCR[23:22].
6388 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6389 DAG.getConstant(1, DL, MVT::i32));
6390 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6391 DAG.getConstant(0x3, DL, MVT::i32));
6392 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6393 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6394
6395 // Get current value of FPSCR.
6396 SDValue Ops[] = {Chain,
6397 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6398 SDValue FPSCR =
6399 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6400 Chain = FPSCR.getValue(1);
6401 FPSCR = FPSCR.getValue(0);
6402
6403 // Put new rounding mode into FPSCR[23:22].
6404 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6405 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6406 DAG.getConstant(RMMask, DL, MVT::i32));
6407 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6408 SDValue Ops2[] = {
6409 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6410 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6411}
6412
6413static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
6414 const ARMSubtarget *ST) {
6415 SDLoc dl(N);
6416 EVT VT = N->getValueType(0);
6417 if (VT.isVector() && ST->hasNEON()) {
6418
6419 // Compute the least significant set bit: LSB = X & -X
6420 SDValue X = N->getOperand(0);
6421 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6422 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6423
6424 EVT ElemTy = VT.getVectorElementType();
6425
6426 if (ElemTy == MVT::i8) {
6427 // Compute with: cttz(x) = ctpop(lsb - 1)
6428 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6429 DAG.getTargetConstant(1, dl, ElemTy));
6430 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6431 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6432 }
6433
6434 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6435 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6436 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6437 unsigned NumBits = ElemTy.getSizeInBits();
6438 SDValue WidthMinus1 =
6439 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6440 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6441 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6442 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6443 }
6444
6445 // Compute with: cttz(x) = ctpop(lsb - 1)
6446
6447 // Compute LSB - 1.
6448 SDValue Bits;
6449 if (ElemTy == MVT::i64) {
6450 // Load constant 0xffff'ffff'ffff'ffff to register.
6451 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6452 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6453 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6454 } else {
6455 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6456 DAG.getTargetConstant(1, dl, ElemTy));
6457 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6458 }
6459 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6460 }
6461
6462 if (!ST->hasV6T2Ops())
6463 return SDValue();
6464
6465 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6466 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6467}
6468
6469static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
6470 const ARMSubtarget *ST) {
6471 EVT VT = N->getValueType(0);
6472 SDLoc DL(N);
6473
6474 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")(static_cast <bool> (ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? void (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6474, __extension__
__PRETTY_FUNCTION__))
;
6475 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6477, __extension__
__PRETTY_FUNCTION__))
6476 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6477, __extension__
__PRETTY_FUNCTION__))
6477 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6477, __extension__
__PRETTY_FUNCTION__))
;
6478
6479 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6480 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6481 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6482 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6483
6484 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6485 unsigned EltSize = 8;
6486 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6487 while (EltSize != VT.getScalarSizeInBits()) {
6488 SmallVector<SDValue, 8> Ops;
6489 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6490 TLI.getPointerTy(DAG.getDataLayout())));
6491 Ops.push_back(Res);
6492
6493 EltSize *= 2;
6494 NumElts /= 2;
6495 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6496 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6497 }
6498
6499 return Res;
6500}
6501
6502/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6503/// operand of a vector shift operation, where all the elements of the
6504/// build_vector must have the same constant integer value.
6505static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6506 // Ignore bit_converts.
6507 while (Op.getOpcode() == ISD::BITCAST)
6508 Op = Op.getOperand(0);
6509 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6510 APInt SplatBits, SplatUndef;
6511 unsigned SplatBitSize;
6512 bool HasAnyUndefs;
6513 if (!BVN ||
6514 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6515 ElementBits) ||
6516 SplatBitSize > ElementBits)
6517 return false;
6518 Cnt = SplatBits.getSExtValue();
6519 return true;
6520}
6521
6522/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6523/// operand of a vector shift left operation. That value must be in the range:
6524/// 0 <= Value < ElementBits for a left shift; or
6525/// 0 <= Value <= ElementBits for a long left shift.
6526static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6527 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6527, __extension__
__PRETTY_FUNCTION__))
;
6528 int64_t ElementBits = VT.getScalarSizeInBits();
6529 if (!getVShiftImm(Op, ElementBits, Cnt))
6530 return false;
6531 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6532}
6533
6534/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6535/// operand of a vector shift right operation. For a shift opcode, the value
6536/// is positive, but for an intrinsic the value count must be negative. The
6537/// absolute value must be in the range:
6538/// 1 <= |Value| <= ElementBits for a right shift; or
6539/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6540static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6541 int64_t &Cnt) {
6542 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6542, __extension__
__PRETTY_FUNCTION__))
;
6543 int64_t ElementBits = VT.getScalarSizeInBits();
6544 if (!getVShiftImm(Op, ElementBits, Cnt))
6545 return false;
6546 if (!isIntrinsic)
6547 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6548 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6549 Cnt = -Cnt;
6550 return true;
6551 }
6552 return false;
6553}
6554
6555static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
6556 const ARMSubtarget *ST) {
6557 EVT VT = N->getValueType(0);
6558 SDLoc dl(N);
6559 int64_t Cnt;
6560
6561 if (!VT.isVector())
6562 return SDValue();
6563
6564 // We essentially have two forms here. Shift by an immediate and shift by a
6565 // vector register (there are also shift by a gpr, but that is just handled
6566 // with a tablegen pattern). We cannot easily match shift by an immediate in
6567 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6568 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6569 // signed or unsigned, and a negative shift indicates a shift right).
6570 if (N->getOpcode() == ISD::SHL) {
6571 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6572 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6573 DAG.getConstant(Cnt, dl, MVT::i32));
6574 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6575 N->getOperand(1));
6576 }
6577
6578 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6579, __extension__
__PRETTY_FUNCTION__))
6579 "unexpected vector shift opcode")(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6579, __extension__
__PRETTY_FUNCTION__))
;
6580
6581 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6582 unsigned VShiftOpc =
6583 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6584 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6585 DAG.getConstant(Cnt, dl, MVT::i32));
6586 }
6587
6588 // Other right shifts we don't have operations for (we use a shift left by a
6589 // negative number).
6590 EVT ShiftVT = N->getOperand(1).getValueType();
6591 SDValue NegatedCount = DAG.getNode(
6592 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6593 unsigned VShiftOpc =
6594 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6595 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6596}
6597
6598static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
6599 const ARMSubtarget *ST) {
6600 EVT VT = N->getValueType(0);
6601 SDLoc dl(N);
6602
6603 // We can get here for a node like i32 = ISD::SHL i32, i64
6604 if (VT != MVT::i64)
6605 return SDValue();
6606
6607 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6609, __extension__
__PRETTY_FUNCTION__))
6608 N->getOpcode() == ISD::SHL) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6609, __extension__
__PRETTY_FUNCTION__))
6609 "Unknown shift to lower!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL
) && "Unknown shift to lower!") ? void (0) : __assert_fail
("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SHL) && \"Unknown shift to lower!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6609, __extension__
__PRETTY_FUNCTION__))
;
6610
6611 unsigned ShOpc = N->getOpcode();
6612 if (ST->hasMVEIntegerOps()) {
6613 SDValue ShAmt = N->getOperand(1);
6614 unsigned ShPartsOpc = ARMISD::LSLL;
6615 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6616
6617 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6618 // then do the default optimisation
6619 if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
6620 (Con && (Con->getZExtValue() == 0 || Con->getZExtValue() >= 32)))
6621 return SDValue();
6622
6623 // Extract the lower 32 bits of the shift amount if it's not an i32
6624 if (ShAmt->getValueType(0) != MVT::i32)
6625 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6626
6627 if (ShOpc == ISD::SRL) {
6628 if (!Con)
6629 // There is no t2LSRLr instruction so negate and perform an lsll if the
6630 // shift amount is in a register, emulating a right shift.
6631 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6632 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6633 else
6634 // Else generate an lsrl on the immediate shift amount
6635 ShPartsOpc = ARMISD::LSRL;
6636 } else if (ShOpc == ISD::SRA)
6637 ShPartsOpc = ARMISD::ASRL;
6638
6639 // Lower 32 bits of the destination/source
6640 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6641 DAG.getConstant(0, dl, MVT::i32));
6642 // Upper 32 bits of the destination/source
6643 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6644 DAG.getConstant(1, dl, MVT::i32));
6645
6646 // Generate the shift operation as computed above
6647 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6648 ShAmt);
6649 // The upper 32 bits come from the second return value of lsll
6650 Hi = SDValue(Lo.getNode(), 1);
6651 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6652 }
6653
6654 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6655 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6656 return SDValue();
6657
6658 // If we are in thumb mode, we don't have RRX.
6659 if (ST->isThumb1Only())
6660 return SDValue();
6661
6662 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6663 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6664 DAG.getConstant(0, dl, MVT::i32));
6665 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
6666 DAG.getConstant(1, dl, MVT::i32));
6667
6668 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
6669 // captures the result into a carry flag.
6670 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
6671 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6672
6673 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6674 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6675
6676 // Merge the pieces into a single i64 value.
6677 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6678}
6679
6680static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
6681 const ARMSubtarget *ST) {
6682 bool Invert = false;
6683 bool Swap = false;
6684 unsigned Opc = ARMCC::AL;
6685
6686 SDValue Op0 = Op.getOperand(0);
6687 SDValue Op1 = Op.getOperand(1);
6688 SDValue CC = Op.getOperand(2);
6689 EVT VT = Op.getValueType();
6690 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6691 SDLoc dl(Op);
6692
6693 EVT CmpVT;
6694 if (ST->hasNEON())
6695 CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
6696 else {
6697 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No hardware support for integer vector comparison!") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6698, __extension__
__PRETTY_FUNCTION__))
6698 "No hardware support for integer vector comparison!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No hardware support for integer vector comparison!") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No hardware support for integer vector comparison!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6698, __extension__
__PRETTY_FUNCTION__))
;
6699
6700 if (Op.getValueType().getVectorElementType() != MVT::i1)
6701 return SDValue();
6702
6703 // Make sure we expand floating point setcc to scalar if we do not have
6704 // mve.fp, so that we can handle them from there.
6705 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6706 return SDValue();
6707
6708 CmpVT = VT;
6709 }
6710
6711 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6712 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6713 // Special-case integer 64-bit equality comparisons. They aren't legal,
6714 // but they can be lowered with a few vector instructions.
6715 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6716 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6717 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6718 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6719 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6720 DAG.getCondCode(ISD::SETEQ));
6721 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6722 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6723 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6724 if (SetCCOpcode == ISD::SETNE)
6725 Merged = DAG.getNOT(dl, Merged, CmpVT);
6726 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6727 return Merged;
6728 }
6729
6730 if (CmpVT.getVectorElementType() == MVT::i64)
6731 // 64-bit comparisons are not legal in general.
6732 return SDValue();
6733
6734 if (Op1.getValueType().isFloatingPoint()) {
6735 switch (SetCCOpcode) {
6736 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 6736)
;
6737 case ISD::SETUNE:
6738 case ISD::SETNE:
6739 if (ST->hasMVEFloatOps()) {
6740 Opc = ARMCC::NE; break;
6741 } else {
6742 Invert = true; [[fallthrough]];
6743 }
6744 case ISD::SETOEQ:
6745 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6746 case ISD::SETOLT:
6747 case ISD::SETLT: Swap = true; [[fallthrough]];
6748 case ISD::SETOGT:
6749 case ISD::SETGT: Opc = ARMCC::GT; break;
6750 case ISD::SETOLE:
6751 case ISD::SETLE: Swap = true; [[fallthrough]];
6752 case ISD::SETOGE:
6753 case ISD::SETGE: Opc = ARMCC::GE; break;
6754 case ISD::SETUGE: Swap = true; [[fallthrough]];
6755 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6756 case ISD::SETUGT: Swap = true; [[fallthrough]];
6757 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6758 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6759 case ISD::SETONE: {
6760 // Expand this to (OLT | OGT).
6761 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6762 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6763 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6764 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6765 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6766 if (Invert)
6767 Result = DAG.getNOT(dl, Result, VT);
6768 return Result;
6769 }
6770 case ISD::SETUO: Invert = true; [[fallthrough]];
6771 case ISD::SETO: {
6772 // Expand this to (OLT | OGE).
6773 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6774 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6775 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6776 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6777 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6778 if (Invert)
6779 Result = DAG.getNOT(dl, Result, VT);
6780 return Result;
6781 }
6782 }
6783 } else {
6784 // Integer comparisons.
6785 switch (SetCCOpcode) {
6786 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6786)
;
6787 case ISD::SETNE:
6788 if (ST->hasMVEIntegerOps()) {
6789 Opc = ARMCC::NE; break;
6790 } else {
6791 Invert = true; [[fallthrough]];
6792 }
6793 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6794 case ISD::SETLT: Swap = true; [[fallthrough]];
6795 case ISD::SETGT: Opc = ARMCC::GT; break;
6796 case ISD::SETLE: Swap = true; [[fallthrough]];
6797 case ISD::SETGE: Opc = ARMCC::GE; break;
6798 case ISD::SETULT: Swap = true; [[fallthrough]];
6799 case ISD::SETUGT: Opc = ARMCC::HI; break;
6800 case ISD::SETULE: Swap = true; [[fallthrough]];
6801 case ISD::SETUGE: Opc = ARMCC::HS; break;
6802 }
6803
6804 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6805 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6806 SDValue AndOp;
6807 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6808 AndOp = Op0;
6809 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6810 AndOp = Op1;
6811
6812 // Ignore bitconvert.
6813 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6814 AndOp = AndOp.getOperand(0);
6815
6816 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6817 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6818 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6819 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6820 if (!Invert)
6821 Result = DAG.getNOT(dl, Result, VT);
6822 return Result;
6823 }
6824 }
6825 }
6826
6827 if (Swap)
6828 std::swap(Op0, Op1);
6829
6830 // If one of the operands is a constant vector zero, attempt to fold the
6831 // comparison to a specialized compare-against-zero form.
6832 SDValue SingleOp;
6833 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6834 SingleOp = Op0;
6835 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6836 if (Opc == ARMCC::GE)
6837 Opc = ARMCC::LE;
6838 else if (Opc == ARMCC::GT)
6839 Opc = ARMCC::LT;
6840 SingleOp = Op1;
6841 }
6842
6843 SDValue Result;
6844 if (SingleOp.getNode()) {
6845 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
6846 DAG.getConstant(Opc, dl, MVT::i32));
6847 } else {
6848 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6849 DAG.getConstant(Opc, dl, MVT::i32));
6850 }
6851
6852 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6853
6854 if (Invert)
6855 Result = DAG.getNOT(dl, Result, VT);
6856
6857 return Result;
6858}
6859
6860static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
6861 SDValue LHS = Op.getOperand(0);
6862 SDValue RHS = Op.getOperand(1);
6863 SDValue Carry = Op.getOperand(2);
6864 SDValue Cond = Op.getOperand(3);
6865 SDLoc DL(Op);
6866
6867 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.")(static_cast <bool> (LHS.getSimpleValueType().isInteger
() && "SETCCCARRY is integer only.") ? void (0) : __assert_fail
("LHS.getSimpleValueType().isInteger() && \"SETCCCARRY is integer only.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6867, __extension__
__PRETTY_FUNCTION__))
;
6868
6869 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
6870 // have to invert the carry first.
6871 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6872 DAG.getConstant(1, DL, MVT::i32), Carry);
6873 // This converts the boolean value carry into the carry flag.
6874 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6875
6876 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6877 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6878
6879 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6880 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6881 SDValue ARMcc = DAG.getConstant(
6882 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6883 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6884 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6885 Cmp.getValue(1), SDValue());
6886 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6887 CCR, Chain.getValue(1));
6888}
6889
6890/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6891/// valid vector constant for a NEON or MVE instruction with a "modified
6892/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6893static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6894 unsigned SplatBitSize, SelectionDAG &DAG,
6895 const SDLoc &dl, EVT &VT, EVT VectorVT,
6896 VMOVModImmType type) {
6897 unsigned OpCmode, Imm;
6898 bool is128Bits = VectorVT.is128BitVector();
6899
6900 // SplatBitSize is set to the smallest size that splats the vector, so a
6901 // zero vector will always have SplatBitSize == 8. However, NEON modified
6902 // immediate instructions others than VMOV do not support the 8-bit encoding
6903 // of a zero vector, and the default encoding of zero is supposed to be the
6904 // 32-bit version.
6905 if (SplatBits == 0)
6906 SplatBitSize = 32;
6907
6908 switch (SplatBitSize) {
6909 case 8:
6910 if (type != VMOVModImm)
6911 return SDValue();
6912 // Any 1-byte value is OK. Op=0, Cmode=1110.
6913 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(static_cast <bool> ((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big") ? void (0) : __assert_fail
("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 6913, __extension__
__PRETTY_FUNCTION__))
;
6914 OpCmode = 0xe;
6915 Imm = SplatBits;
6916 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6917 break;
6918
6919 case 16:
6920 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6921 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6922 if ((SplatBits & ~0xff) == 0) {
6923 // Value = 0x00nn: Op=x, Cmode=100x.
6924 OpCmode = 0x8;
6925 Imm = SplatBits;
6926 break;
6927 }
6928 if ((SplatBits & ~0xff00) == 0) {
6929 // Value = 0xnn00: Op=x, Cmode=101x.
6930 OpCmode = 0xa;
6931 Imm = SplatBits >> 8;
6932 break;
6933 }
6934 return SDValue();
6935
6936 case 32:
6937 // NEON's 32-bit VMOV supports splat values where:
6938 // * only one byte is nonzero, or
6939 // * the least significant byte is 0xff and the second byte is nonzero, or
6940 // * the least significant 2 bytes are 0xff and the third is nonzero.
6941 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
6942 if ((SplatBits & ~0xff) == 0) {
6943 // Value = 0x000000nn: Op=x, Cmode=000x.
6944 OpCmode = 0;
6945 Imm = SplatBits;
6946 break;
6947 }
6948 if ((SplatBits & ~0xff00) == 0) {
6949 // Value = 0x0000nn00: Op=x, Cmode=001x.
6950 OpCmode = 0x2;
6951 Imm = SplatBits >> 8;
6952 break;
6953 }
6954 if ((SplatBits & ~0xff0000) == 0) {
6955 // Value = 0x00nn0000: Op=x, Cmode=010x.
6956 OpCmode = 0x4;
6957 Imm = SplatBits >> 16;
6958 break;
6959 }
6960 if ((SplatBits & ~0xff000000) == 0) {
6961 // Value = 0xnn000000: Op=x, Cmode=011x.
6962 OpCmode = 0x6;
6963 Imm = SplatBits >> 24;
6964 break;
6965 }
6966
6967 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
6968 if (type == OtherModImm) return SDValue();
6969
6970 if ((SplatBits & ~0xffff) == 0 &&
6971 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
6972 // Value = 0x0000nnff: Op=x, Cmode=1100.
6973 OpCmode = 0xc;
6974 Imm = SplatBits >> 8;
6975 break;
6976 }
6977
6978 // cmode == 0b1101 is not supported for MVE VMVN
6979 if (type == MVEVMVNModImm)
6980 return SDValue();
6981
6982 if ((SplatBits & ~0xffffff) == 0 &&
6983 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
6984 // Value = 0x00nnffff: Op=x, Cmode=1101.
6985 OpCmode = 0xd;
6986 Imm = SplatBits >> 16;
6987 break;
6988 }
6989
6990 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
6991 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
6992 // VMOV.I32. A (very) minor optimization would be to replicate the value
6993 // and fall through here to test for a valid 64-bit splat. But, then the
6994 // caller would also need to check and handle the change in size.
6995 return SDValue();
6996
6997 case 64: {
6998 if (type != VMOVModImm)
6999 return SDValue();
7000 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7001 uint64_t BitMask = 0xff;
7002 unsigned ImmMask = 1;
7003 Imm = 0;
7004 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7005 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7006 Imm |= ImmMask;
7007 } else if ((SplatBits & BitMask) != 0) {
7008 return SDValue();
7009 }
7010 BitMask <<= 8;
7011 ImmMask <<= 1;
7012 }
7013
7014 if (DAG.getDataLayout().isBigEndian()) {
7015 // Reverse the order of elements within the vector.
7016 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7017 unsigned Mask = (1 << BytesPerElem) - 1;
7018 unsigned NumElems = 8 / BytesPerElem;
7019 unsigned NewImm = 0;
7020 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7021 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7022 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7023 }
7024 Imm = NewImm;
7025 }
7026
7027 // Op=1, Cmode=1110.
7028 OpCmode = 0x1e;
7029 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7030 break;
7031 }
7032
7033 default:
7034 llvm_unreachable("unexpected size for isVMOVModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isVMOVModifiedImm"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7034)
;
7035 }
7036
7037 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7038 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7039}
7040
7041SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7042 const ARMSubtarget *ST) const {
7043 EVT VT = Op.getValueType();
7044 bool IsDouble = (VT == MVT::f64);
7045 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7046 const APFloat &FPVal = CFP->getValueAPF();
7047
7048 // Prevent floating-point constants from using literal loads
7049 // when execute-only is enabled.
7050 if (ST->genExecuteOnly()) {
7051 // If we can represent the constant as an immediate, don't lower it
7052 if (isFPImmLegal(FPVal, VT))
7053 return Op;
7054 // Otherwise, construct as integer, and move to float register
7055 APInt INTVal = FPVal.bitcastToAPInt();
7056 SDLoc DL(CFP);
7057 switch (VT.getSimpleVT().SimpleTy) {
7058 default:
7059 llvm_unreachable("Unknown floating point type!")::llvm::llvm_unreachable_internal("Unknown floating point type!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7059)
;
7060 break;
7061 case MVT::f64: {
7062 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7063 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7064 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7065 }
7066 case MVT::f32:
7067 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7068 DAG.getConstant(INTVal, DL, MVT::i32));
7069 }
7070 }
7071
7072 if (!ST->hasVFP3Base())
7073 return SDValue();
7074
7075 // Use the default (constant pool) lowering for double constants when we have
7076 // an SP-only FPU
7077 if (IsDouble && !Subtarget->hasFP64())
7078 return SDValue();
7079
7080 // Try splatting with a VMOV.f32...
7081 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7082
7083 if (ImmVal != -1) {
7084 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7085 // We have code in place to select a valid ConstantFP already, no need to
7086 // do any mangling.
7087 return Op;
7088 }
7089
7090 // It's a float and we are trying to use NEON operations where
7091 // possible. Lower it to a splat followed by an extract.
7092 SDLoc DL(Op);
7093 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7094 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7095 NewVal);
7096 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7097 DAG.getConstant(0, DL, MVT::i32));
7098 }
7099
7100 // The rest of our options are NEON only, make sure that's allowed before
7101 // proceeding..
7102 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7103 return SDValue();
7104
7105 EVT VMovVT;
7106 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7107
7108 // It wouldn't really be worth bothering for doubles except for one very
7109 // important value, which does happen to match: 0.0. So make sure we don't do
7110 // anything stupid.
7111 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7112 return SDValue();
7113
7114 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7115 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7116 VMovVT, VT, VMOVModImm);
7117 if (NewVal != SDValue()) {
7118 SDLoc DL(Op);
7119 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7120 NewVal);
7121 if (IsDouble)
7122 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7123
7124 // It's a float: cast and extract a vector element.
7125 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7126 VecConstant);
7127 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7128 DAG.getConstant(0, DL, MVT::i32));
7129 }
7130
7131 // Finally, try a VMVN.i32
7132 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7133 VT, VMVNModImm);
7134 if (NewVal != SDValue()) {
7135 SDLoc DL(Op);
7136 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7137
7138 if (IsDouble)
7139 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7140
7141 // It's a float: cast and extract a vector element.
7142 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7143 VecConstant);
7144 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7145 DAG.getConstant(0, DL, MVT::i32));
7146 }
7147
7148 return SDValue();
7149}
7150
7151// check if an VEXT instruction can handle the shuffle mask when the
7152// vector sources of the shuffle are the same.
7153static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7154 unsigned NumElts = VT.getVectorNumElements();
7155
7156 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7157 if (M[0] < 0)
7158 return false;
7159
7160 Imm = M[0];
7161
7162 // If this is a VEXT shuffle, the immediate value is the index of the first
7163 // element. The other shuffle indices must be the successive elements after
7164 // the first one.
7165 unsigned ExpectedElt = Imm;
7166 for (unsigned i = 1; i < NumElts; ++i) {
7167 // Increment the expected index. If it wraps around, just follow it
7168 // back to index zero and keep going.
7169 ++ExpectedElt;
7170 if (ExpectedElt == NumElts)
7171 ExpectedElt = 0;
7172
7173 if (M[i] < 0) continue; // ignore UNDEF indices
7174 if (ExpectedElt != static_cast<unsigned>(M[i]))
7175 return false;
7176 }
7177
7178 return true;
7179}
7180
7181static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7182 bool &ReverseVEXT, unsigned &Imm) {
7183 unsigned NumElts = VT.getVectorNumElements();
7184 ReverseVEXT = false;
7185
7186 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7187 if (M[0] < 0)
7188 return false;
7189
7190 Imm = M[0];
7191
7192 // If this is a VEXT shuffle, the immediate value is the index of the first
7193 // element. The other shuffle indices must be the successive elements after
7194 // the first one.
7195 unsigned ExpectedElt = Imm;
7196 for (unsigned i = 1; i < NumElts; ++i) {
7197 // Increment the expected index. If it wraps around, it may still be
7198 // a VEXT but the source vectors must be swapped.
7199 ExpectedElt += 1;
7200 if (ExpectedElt == NumElts * 2) {
7201 ExpectedElt = 0;
7202 ReverseVEXT = true;
7203 }
7204
7205 if (M[i] < 0) continue; // ignore UNDEF indices
7206 if (ExpectedElt != static_cast<unsigned>(M[i]))
7207 return false;
7208 }
7209
7210 // Adjust the index value if the source operands will be swapped.
7211 if (ReverseVEXT)
7212 Imm -= NumElts;
7213
7214 return true;
7215}
7216
7217static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7218 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7219 // range, then 0 is placed into the resulting vector. So pretty much any mask
7220 // of 8 elements can work here.
7221 return VT == MVT::v8i8 && M.size() == 8;
7222}
7223
7224static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7225 unsigned Index) {
7226 if (Mask.size() == Elements * 2)
7227 return Index / Elements;
7228 return Mask[Index] == 0 ? 0 : 1;
7229}
7230
7231// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7232// checking that pairs of elements in the shuffle mask represent the same index
7233// in each vector, incrementing the expected index by 2 at each step.
7234// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7235// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7236// v2={e,f,g,h}
7237// WhichResult gives the offset for each element in the mask based on which
7238// of the two results it belongs to.
7239//
7240// The transpose can be represented either as:
7241// result1 = shufflevector v1, v2, result1_shuffle_mask
7242// result2 = shufflevector v1, v2, result2_shuffle_mask
7243// where v1/v2 and the shuffle masks have the same number of elements
7244// (here WhichResult (see below) indicates which result is being checked)
7245//
7246// or as:
7247// results = shufflevector v1, v2, shuffle_mask
7248// where both results are returned in one vector and the shuffle mask has twice
7249// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7250// want to check the low half and high half of the shuffle mask as if it were
7251// the other case
7252static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7253 unsigned EltSz = VT.getScalarSizeInBits();
7254 if (EltSz == 64)
7255 return false;
7256
7257 unsigned NumElts = VT.getVectorNumElements();
7258 if (M.size() != NumElts && M.size() != NumElts*2)
7259 return false;
7260
7261 // If the mask is twice as long as the input vector then we need to check the
7262 // upper and lower parts of the mask with a matching value for WhichResult
7263 // FIXME: A mask with only even values will be rejected in case the first
7264 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7265 // M[0] is used to determine WhichResult
7266 for (unsigned i = 0; i < M.size(); i += NumElts) {
7267 WhichResult = SelectPairHalf(NumElts, M, i);
7268 for (unsigned j = 0; j < NumElts; j += 2) {
7269 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7270 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7271 return false;
7272 }
7273 }
7274
7275 if (M.size() == NumElts*2)
7276 WhichResult = 0;
7277
7278 return true;
7279}
7280
7281/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7282/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7283/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7284static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7285 unsigned EltSz = VT.getScalarSizeInBits();
7286 if (EltSz == 64)
7287 return false;
7288
7289 unsigned NumElts = VT.getVectorNumElements();
7290 if (M.size() != NumElts && M.size() != NumElts*2)
7291 return false;
7292
7293 for (unsigned i = 0; i < M.size(); i += NumElts) {
7294 WhichResult = SelectPairHalf(NumElts, M, i);
7295 for (unsigned j = 0; j < NumElts; j += 2) {
7296 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7297 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7298 return false;
7299 }
7300 }
7301
7302 if (M.size() == NumElts*2)
7303 WhichResult = 0;
7304
7305 return true;
7306}
7307
7308// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7309// that the mask elements are either all even and in steps of size 2 or all odd
7310// and in steps of size 2.
7311// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7312// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7313// v2={e,f,g,h}
7314// Requires similar checks to that of isVTRNMask with
7315// respect the how results are returned.
7316static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7317 unsigned EltSz = VT.getScalarSizeInBits();
7318 if (EltSz == 64)
7319 return false;
7320
7321 unsigned NumElts = VT.getVectorNumElements();
7322 if (M.size() != NumElts && M.size() != NumElts*2)
7323 return false;
7324
7325 for (unsigned i = 0; i < M.size(); i += NumElts) {
7326 WhichResult = SelectPairHalf(NumElts, M, i);
7327 for (unsigned j = 0; j < NumElts; ++j) {
7328 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7329 return false;
7330 }
7331 }
7332
7333 if (M.size() == NumElts*2)
7334 WhichResult = 0;
7335
7336 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7337 if (VT.is64BitVector() && EltSz == 32)
7338 return false;
7339
7340 return true;
7341}
7342
7343/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7344/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7345/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7346static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7347 unsigned EltSz = VT.getScalarSizeInBits();
7348 if (EltSz == 64)
7349 return false;
7350
7351 unsigned NumElts = VT.getVectorNumElements();
7352 if (M.size() != NumElts && M.size() != NumElts*2)
7353 return false;
7354
7355 unsigned Half = NumElts / 2;
7356 for (unsigned i = 0; i < M.size(); i += NumElts) {
7357 WhichResult = SelectPairHalf(NumElts, M, i);
7358 for (unsigned j = 0; j < NumElts; j += Half) {
7359 unsigned Idx = WhichResult;
7360 for (unsigned k = 0; k < Half; ++k) {
7361 int MIdx = M[i + j + k];
7362 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7363 return false;
7364 Idx += 2;
7365 }
7366 }
7367 }
7368
7369 if (M.size() == NumElts*2)
7370 WhichResult = 0;
7371
7372 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7373 if (VT.is64BitVector() && EltSz == 32)
7374 return false;
7375
7376 return true;
7377}
7378
7379// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7380// that pairs of elements of the shufflemask represent the same index in each
7381// vector incrementing sequentially through the vectors.
7382// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7383// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7384// v2={e,f,g,h}
7385// Requires similar checks to that of isVTRNMask with respect the how results
7386// are returned.
7387static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7388 unsigned EltSz = VT.getScalarSizeInBits();
7389 if (EltSz == 64)
7390 return false;
7391
7392 unsigned NumElts = VT.getVectorNumElements();
7393 if (M.size() != NumElts && M.size() != NumElts*2)
7394 return false;
7395
7396 for (unsigned i = 0; i < M.size(); i += NumElts) {
7397 WhichResult = SelectPairHalf(NumElts, M, i);
7398 unsigned Idx = WhichResult * NumElts / 2;
7399 for (unsigned j = 0; j < NumElts; j += 2) {
7400 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7401 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7402 return false;
7403 Idx += 1;
7404 }
7405 }
7406
7407 if (M.size() == NumElts*2)
7408 WhichResult = 0;
7409
7410 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7411 if (VT.is64BitVector() && EltSz == 32)
7412 return false;
7413
7414 return true;
7415}
7416
7417/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7418/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7419/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7420static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7421 unsigned EltSz = VT.getScalarSizeInBits();
7422 if (EltSz == 64)
7423 return false;
7424
7425 unsigned NumElts = VT.getVectorNumElements();
7426 if (M.size() != NumElts && M.size() != NumElts*2)
7427 return false;
7428
7429 for (unsigned i = 0; i < M.size(); i += NumElts) {
7430 WhichResult = SelectPairHalf(NumElts, M, i);
7431 unsigned Idx = WhichResult * NumElts / 2;
7432 for (unsigned j = 0; j < NumElts; j += 2) {
7433 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7434 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7435 return false;
7436 Idx += 1;
7437 }
7438 }
7439
7440 if (M.size() == NumElts*2)
7441 WhichResult = 0;
7442
7443 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7444 if (VT.is64BitVector() && EltSz == 32)
7445 return false;
7446
7447 return true;
7448}
7449
7450/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7451/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7452static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7453 unsigned &WhichResult,
7454 bool &isV_UNDEF) {
7455 isV_UNDEF = false;
7456 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7457 return ARMISD::VTRN;
7458 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7459 return ARMISD::VUZP;
7460 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7461 return ARMISD::VZIP;
7462
7463 isV_UNDEF = true;
7464 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7465 return ARMISD::VTRN;
7466 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7467 return ARMISD::VUZP;
7468 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7469 return ARMISD::VZIP;
7470
7471 return 0;
7472}
7473
7474/// \return true if this is a reverse operation on an vector.
7475static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7476 unsigned NumElts = VT.getVectorNumElements();
7477 // Make sure the mask has the right size.
7478 if (NumElts != M.size())
7479 return false;
7480
7481 // Look for <15, ..., 3, -1, 1, 0>.
7482 for (unsigned i = 0; i != NumElts; ++i)
7483 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7484 return false;
7485
7486 return true;
7487}
7488
7489static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7490 unsigned NumElts = VT.getVectorNumElements();
7491 // Make sure the mask has the right size.
7492 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7493 return false;
7494
7495 // If Top
7496 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7497 // This inserts Input2 into Input1
7498 // else if not Top
7499 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7500 // This inserts Input1 into Input2
7501 unsigned Offset = Top ? 0 : 1;
7502 unsigned N = SingleSource ? 0 : NumElts;
7503 for (unsigned i = 0; i < NumElts; i += 2) {
7504 if (M[i] >= 0 && M[i] != (int)i)
7505 return false;
7506 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7507 return false;
7508 }
7509
7510 return true;
7511}
7512
7513static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7514 unsigned NumElts = ToVT.getVectorNumElements();
7515 if (NumElts != M.size())
7516 return false;
7517
7518 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7519 // looking for patterns of:
7520 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7521 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7522
7523 unsigned Off0 = rev ? NumElts / 2 : 0;
7524 unsigned Off1 = rev ? 0 : NumElts / 2;
7525 for (unsigned i = 0; i < NumElts; i += 2) {
7526 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7527 return false;
7528 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7529 return false;
7530 }
7531
7532 return true;
7533}
7534
7535// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7536// from a pair of inputs. For example:
7537// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7538// FP_ROUND(EXTRACT_ELT(Y, 0),
7539// FP_ROUND(EXTRACT_ELT(X, 1),
7540// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7541static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG,
7542 const ARMSubtarget *ST) {
7543 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (BV.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7543, __extension__
__PRETTY_FUNCTION__))
;
7544 if (!ST->hasMVEFloatOps())
7545 return SDValue();
7546
7547 SDLoc dl(BV);
7548 EVT VT = BV.getValueType();
7549 if (VT != MVT::v8f16)
7550 return SDValue();
7551
7552 // We are looking for a buildvector of fptrunc elements, where all the
7553 // elements are interleavingly extracted from two sources. Check the first two
7554 // items are valid enough and extract some info from them (they are checked
7555 // properly in the loop below).
7556 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7557 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7558 BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)
7559 return SDValue();
7560 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7561 BV.getOperand(1).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7562 BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)
7563 return SDValue();
7564 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7565 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7566 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7567 return SDValue();
7568
7569 // Check all the values in the BuildVector line up with our expectations.
7570 for (unsigned i = 1; i < 4; i++) {
7571 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7572 return Trunc.getOpcode() == ISD::FP_ROUND &&
7573 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7574 Trunc.getOperand(0).getOperand(0) == Op &&
7575 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7576 };
7577 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7578 return SDValue();
7579 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7580 return SDValue();
7581 }
7582
7583 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7584 DAG.getConstant(0, dl, MVT::i32));
7585 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7586 DAG.getConstant(1, dl, MVT::i32));
7587}
7588
7589// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7590// from a single input on alternating lanes. For example:
7591// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7592// FP_ROUND(EXTRACT_ELT(X, 2),
7593// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7594static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG,
7595 const ARMSubtarget *ST) {
7596 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (BV.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("BV.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7596, __extension__
__PRETTY_FUNCTION__))
;
7597 if (!ST->hasMVEFloatOps())
7598 return SDValue();
7599
7600 SDLoc dl(BV);
7601 EVT VT = BV.getValueType();
7602 if (VT != MVT::v4f32)
7603 return SDValue();
7604
7605 // We are looking for a buildvector of fptext elements, where all the
7606 // elements are alternating lanes from a single source. For example <0,2,4,6>
7607 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7608 // info from them (they are checked properly in the loop below).
7609 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7610 BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
7611 return SDValue();
7612 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7613 int Offset = BV.getOperand(0).getOperand(0).getConstantOperandVal(1);
7614 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7615 return SDValue();
7616
7617 // Check all the values in the BuildVector line up with our expectations.
7618 for (unsigned i = 1; i < 4; i++) {
7619 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7620 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7621 Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7622 Trunc.getOperand(0).getOperand(0) == Op &&
7623 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7624 };
7625 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7626 return SDValue();
7627 }
7628
7629 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7630 DAG.getConstant(Offset, dl, MVT::i32));
7631}
7632
7633// If N is an integer constant that can be moved into a register in one
7634// instruction, return an SDValue of such a constant (will become a MOV
7635// instruction). Otherwise return null.
7636static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
7637 const ARMSubtarget *ST, const SDLoc &dl) {
7638 uint64_t Val;
7639 if (!isa<ConstantSDNode>(N))
7640 return SDValue();
7641 Val = cast<ConstantSDNode>(N)->getZExtValue();
7642
7643 if (ST->isThumb1Only()) {
7644 if (Val <= 255 || ~Val <= 255)
7645 return DAG.getConstant(Val, dl, MVT::i32);
7646 } else {
7647 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7648 return DAG.getConstant(Val, dl, MVT::i32);
7649 }
7650 return SDValue();
7651}
7652
7653static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,
7654 const ARMSubtarget *ST) {
7655 SDLoc dl(Op);
7656 EVT VT = Op.getValueType();
7657
7658 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerBUILD_VECTOR_i1 called without MVE!") ? void (0) : __assert_fail
("ST->hasMVEIntegerOps() && \"LowerBUILD_VECTOR_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7658, __extension__
__PRETTY_FUNCTION__))
;
7659
7660 unsigned NumElts = VT.getVectorNumElements();
7661 unsigned BoolMask;
7662 unsigned BitsPerBool;
7663 if (NumElts == 2) {
7664 BitsPerBool = 8;
7665 BoolMask = 0xff;
7666 } else if (NumElts == 4) {
7667 BitsPerBool = 4;
7668 BoolMask = 0xf;
7669 } else if (NumElts == 8) {
7670 BitsPerBool = 2;
7671 BoolMask = 0x3;
7672 } else if (NumElts == 16) {
7673 BitsPerBool = 1;
7674 BoolMask = 0x1;
7675 } else
7676 return SDValue();
7677
7678 // If this is a single value copied into all lanes (a splat), we can just sign
7679 // extend that single value
7680 SDValue FirstOp = Op.getOperand(0);
7681 if (!isa<ConstantSDNode>(FirstOp) &&
7682 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7683 return U.get().isUndef() || U.get() == FirstOp;
7684 })) {
7685 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7686 DAG.getValueType(MVT::i1));
7687 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7688 }
7689
7690 // First create base with bits set where known
7691 unsigned Bits32 = 0;
7692 for (unsigned i = 0; i < NumElts; ++i) {
7693 SDValue V = Op.getOperand(i);
7694 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7695 continue;
7696 bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue();
7697 if (BitSet)
7698 Bits32 |= BoolMask << (i * BitsPerBool);
7699 }
7700
7701 // Add in unknown nodes
7702 SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
7703 DAG.getConstant(Bits32, dl, MVT::i32));
7704 for (unsigned i = 0; i < NumElts; ++i) {
7705 SDValue V = Op.getOperand(i);
7706 if (isa<ConstantSDNode>(V) || V.isUndef())
7707 continue;
7708 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7709 DAG.getConstant(i, dl, MVT::i32));
7710 }
7711
7712 return Base;
7713}
7714
7715static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,
7716 const ARMSubtarget *ST) {
7717 if (!ST->hasMVEIntegerOps())
7718 return SDValue();
7719
7720 // We are looking for a buildvector where each element is Op[0] + i*N
7721 EVT VT = Op.getValueType();
7722 SDValue Op0 = Op.getOperand(0);
7723 unsigned NumElts = VT.getVectorNumElements();
7724
7725 // Get the increment value from operand 1
7726 SDValue Op1 = Op.getOperand(1);
7727 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7728 !isa<ConstantSDNode>(Op1.getOperand(1)))
7729 return SDValue();
7730 unsigned N = Op1.getConstantOperandVal(1);
7731 if (N != 1 && N != 2 && N != 4 && N != 8)
7732 return SDValue();
7733
7734 // Check that each other operand matches
7735 for (unsigned I = 2; I < NumElts; I++) {
7736 SDValue OpI = Op.getOperand(I);
7737 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7738 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7739 OpI.getConstantOperandVal(1) != I * N)
7740 return SDValue();
7741 }
7742
7743 SDLoc DL(Op);
7744 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7745 DAG.getConstant(N, DL, MVT::i32));
7746}
7747
7748// Returns true if the operation N can be treated as qr instruction variant at
7749// operand Op.
7750static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7751 switch (N->getOpcode()) {
7752 case ISD::ADD:
7753 case ISD::MUL:
7754 case ISD::SADDSAT:
7755 case ISD::UADDSAT:
7756 return true;
7757 case ISD::SUB:
7758 case ISD::SSUBSAT:
7759 case ISD::USUBSAT:
7760 return N->getOperand(1).getNode() == Op;
7761 case ISD::INTRINSIC_WO_CHAIN:
7762 switch (N->getConstantOperandVal(0)) {
7763 case Intrinsic::arm_mve_add_predicated:
7764 case Intrinsic::arm_mve_mul_predicated:
7765 case Intrinsic::arm_mve_qadd_predicated:
7766 case Intrinsic::arm_mve_vhadd:
7767 case Intrinsic::arm_mve_hadd_predicated:
7768 case Intrinsic::arm_mve_vqdmulh:
7769 case Intrinsic::arm_mve_qdmulh_predicated:
7770 case Intrinsic::arm_mve_vqrdmulh:
7771 case Intrinsic::arm_mve_qrdmulh_predicated:
7772 case Intrinsic::arm_mve_vqdmull:
7773 case Intrinsic::arm_mve_vqdmull_predicated:
7774 return true;
7775 case Intrinsic::arm_mve_sub_predicated:
7776 case Intrinsic::arm_mve_qsub_predicated:
7777 case Intrinsic::arm_mve_vhsub:
7778 case Intrinsic::arm_mve_hsub_predicated:
7779 return N->getOperand(2).getNode() == Op;
7780 default:
7781 return false;
7782 }
7783 default:
7784 return false;
7785 }
7786}
7787
7788// If this is a case we can't handle, return null and let the default
7789// expansion code take care of it.
7790SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7791 const ARMSubtarget *ST) const {
7792 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7793 SDLoc dl(Op);
7794 EVT VT = Op.getValueType();
7795
7796 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7797 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7798
7799 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7800 return R;
7801
7802 APInt SplatBits, SplatUndef;
7803 unsigned SplatBitSize;
7804 bool HasAnyUndefs;
7805 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7806 if (SplatUndef.isAllOnes())
7807 return DAG.getUNDEF(VT);
7808
7809 // If all the users of this constant splat are qr instruction variants,
7810 // generate a vdup of the constant.
7811 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7812 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7813 all_of(BVN->uses(),
7814 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7815 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7816 : SplatBitSize == 16 ? MVT::v8i16
7817 : MVT::v16i8;
7818 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7819 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7820 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7821 }
7822
7823 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7824 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7825 // Check if an immediate VMOV works.
7826 EVT VmovVT;
7827 SDValue Val =
7828 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7829 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7830
7831 if (Val.getNode()) {
7832 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7833 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7834 }
7835
7836 // Try an immediate VMVN.
7837 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7838 Val = isVMOVModifiedImm(
7839 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7840 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7841 if (Val.getNode()) {
7842 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7843 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7844 }
7845
7846 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7847 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7848 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7849 if (ImmVal != -1) {
7850 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7851 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7852 }
7853 }
7854
7855 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7856 // type.
7857 if (ST->hasMVEIntegerOps() &&
7858 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7859 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7860 : SplatBitSize == 16 ? MVT::v8i16
7861 : MVT::v16i8;
7862 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7863 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7864 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7865 }
7866 }
7867 }
7868
7869 // Scan through the operands to see if only one value is used.
7870 //
7871 // As an optimisation, even if more than one value is used it may be more
7872 // profitable to splat with one value then change some lanes.
7873 //
7874 // Heuristically we decide to do this if the vector has a "dominant" value,
7875 // defined as splatted to more than half of the lanes.
7876 unsigned NumElts = VT.getVectorNumElements();
7877 bool isOnlyLowElement = true;
7878 bool usesOnlyOneValue = true;
7879 bool hasDominantValue = false;
7880 bool isConstant = true;
7881
7882 // Map of the number of times a particular SDValue appears in the
7883 // element list.
7884 DenseMap<SDValue, unsigned> ValueCounts;
7885 SDValue Value;
7886 for (unsigned i = 0; i < NumElts; ++i) {
7887 SDValue V = Op.getOperand(i);
7888 if (V.isUndef())
7889 continue;
7890 if (i > 0)
7891 isOnlyLowElement = false;
7892 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7893 isConstant = false;
7894
7895 ValueCounts.insert(std::make_pair(V, 0));
7896 unsigned &Count = ValueCounts[V];
7897
7898 // Is this value dominant? (takes up more than half of the lanes)
7899 if (++Count > (NumElts / 2)) {
7900 hasDominantValue = true;
7901 Value = V;
7902 }
7903 }
7904 if (ValueCounts.size() != 1)
7905 usesOnlyOneValue = false;
7906 if (!Value.getNode() && !ValueCounts.empty())
7907 Value = ValueCounts.begin()->first;
7908
7909 if (ValueCounts.empty())
7910 return DAG.getUNDEF(VT);
7911
7912 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
7913 // Keep going if we are hitting this case.
7914 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
7915 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
7916
7917 unsigned EltSize = VT.getScalarSizeInBits();
7918
7919 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
7920 // i32 and try again.
7921 if (hasDominantValue && EltSize <= 32) {
7922 if (!isConstant) {
7923 SDValue N;
7924
7925 // If we are VDUPing a value that comes directly from a vector, that will
7926 // cause an unnecessary move to and from a GPR, where instead we could
7927 // just use VDUPLANE. We can only do this if the lane being extracted
7928 // is at a constant index, as the VDUP from lane instructions only have
7929 // constant-index forms.
7930 ConstantSDNode *constIndex;
7931 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7932 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
7933 // We need to create a new undef vector to use for the VDUPLANE if the
7934 // size of the vector from which we get the value is different than the
7935 // size of the vector that we need to create. We will insert the element
7936 // such that the register coalescer will remove unnecessary copies.
7937 if (VT != Value->getOperand(0).getValueType()) {
7938 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
7939 VT.getVectorNumElements();
7940 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7941 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
7942 Value, DAG.getConstant(index, dl, MVT::i32)),
7943 DAG.getConstant(index, dl, MVT::i32));
7944 } else
7945 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
7946 Value->getOperand(0), Value->getOperand(1));
7947 } else
7948 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
7949
7950 if (!usesOnlyOneValue) {
7951 // The dominant value was splatted as 'N', but we now have to insert
7952 // all differing elements.
7953 for (unsigned I = 0; I < NumElts; ++I) {
7954 if (Op.getOperand(I) == Value)
7955 continue;
7956 SmallVector<SDValue, 3> Ops;
7957 Ops.push_back(N);
7958 Ops.push_back(Op.getOperand(I));
7959 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
7960 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
7961 }
7962 }
7963 return N;
7964 }
7965 if (VT.getVectorElementType().isFloatingPoint()) {
7966 SmallVector<SDValue, 8> Ops;
7967 MVT FVT = VT.getVectorElementType().getSimpleVT();
7968 assert(FVT == MVT::f32 || FVT == MVT::f16)(static_cast <bool> (FVT == MVT::f32 || FVT == MVT::f16
) ? void (0) : __assert_fail ("FVT == MVT::f32 || FVT == MVT::f16"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 7968, __extension__
__PRETTY_FUNCTION__))
;
7969 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
7970 for (unsigned i = 0; i < NumElts; ++i)
7971 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
7972 Op.getOperand(i)));
7973 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
7974 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
7975 Val = LowerBUILD_VECTOR(Val, DAG, ST);
7976 if (Val.getNode())
7977 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
7978 }
7979 if (usesOnlyOneValue) {
7980 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
7981 if (isConstant && Val.getNode())
7982 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
7983 }
7984 }
7985
7986 // If all elements are constants and the case above didn't get hit, fall back
7987 // to the default expansion, which will generate a load from the constant
7988 // pool.
7989 if (isConstant)
7990 return SDValue();
7991
7992 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
7993 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
7994 // length <= 2.
7995 if (NumElts >= 4)
7996 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
7997 return shuffle;
7998
7999 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8000 // VCVT's
8001 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8002 return VCVT;
8003 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8004 return VCVT;
8005
8006 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8007 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8008 // into two 64-bit vectors; we might discover a better way to lower it.
8009 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8010 EVT ExtVT = VT.getVectorElementType();
8011 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8012 SDValue Lower =
8013 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
8014 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8015 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8016 SDValue Upper = DAG.getBuildVector(
8017 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
8018 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8019 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8020 if (Lower && Upper)
8021 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8022 }
8023
8024 // Vectors with 32- or 64-bit elements can be built by directly assigning
8025 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8026 // will be legalized.
8027 if (EltSize >= 32) {
8028 // Do the expansion with floating-point types, since that is what the VFP
8029 // registers are defined to use, and since i64 is not legal.
8030 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8031 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8032 SmallVector<SDValue, 8> Ops;
8033 for (unsigned i = 0; i < NumElts; ++i)
8034 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8035 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8036 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8037 }
8038
8039 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8040 // know the default expansion would otherwise fall back on something even
8041 // worse. For a vector with one or two non-undef values, that's
8042 // scalar_to_vector for the elements followed by a shuffle (provided the
8043 // shuffle is valid for the target) and materialization element by element
8044 // on the stack followed by a load for everything else.
8045 if (!isConstant && !usesOnlyOneValue) {
8046 SDValue Vec = DAG.getUNDEF(VT);
8047 for (unsigned i = 0 ; i < NumElts; ++i) {
8048 SDValue V = Op.getOperand(i);
8049 if (V.isUndef())
8050 continue;
8051 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8052 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8053 }
8054 return Vec;
8055 }
8056
8057 return SDValue();
8058}
8059
8060// Gather data to see if the operation can be modelled as a
8061// shuffle in combination with VEXTs.
8062SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8063 SelectionDAG &DAG) const {
8064 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8064, __extension__
__PRETTY_FUNCTION__))
;
8065 SDLoc dl(Op);
8066 EVT VT = Op.getValueType();
8067 unsigned NumElts = VT.getVectorNumElements();
8068
8069 struct ShuffleSourceInfo {
8070 SDValue Vec;
8071 unsigned MinElt = std::numeric_limits<unsigned>::max();
8072 unsigned MaxElt = 0;
8073
8074 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8075 // be compatible with the shuffle we intend to construct. As a result
8076 // ShuffleVec will be some sliding window into the original Vec.
8077 SDValue ShuffleVec;
8078
8079 // Code should guarantee that element i in Vec starts at element "WindowBase
8080 // + i * WindowScale in ShuffleVec".
8081 int WindowBase = 0;
8082 int WindowScale = 1;
8083
8084 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8085
8086 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8087 };
8088
8089 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8090 // node.
8091 SmallVector<ShuffleSourceInfo, 2> Sources;
8092 for (unsigned i = 0; i < NumElts; ++i) {
8093 SDValue V = Op.getOperand(i);
8094 if (V.isUndef())
8095 continue;
8096 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8097 // A shuffle can only come from building a vector from various
8098 // elements of other vectors.
8099 return SDValue();
8100 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8101 // Furthermore, shuffles require a constant mask, whereas extractelts
8102 // accept variable indices.
8103 return SDValue();
8104 }
8105
8106 // Add this element source to the list if it's not already there.
8107 SDValue SourceVec = V.getOperand(0);
8108 auto Source = llvm::find(Sources, SourceVec);
8109 if (Source == Sources.end())
8110 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8111
8112 // Update the minimum and maximum lane number seen.
8113 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8114 Source->MinElt = std::min(Source->MinElt, EltNo);
8115 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8116 }
8117
8118 // Currently only do something sane when at most two source vectors
8119 // are involved.
8120 if (Sources.size() > 2)
8121 return SDValue();
8122
8123 // Find out the smallest element size among result and two sources, and use
8124 // it as element size to build the shuffle_vector.
8125 EVT SmallestEltTy = VT.getVectorElementType();
8126 for (auto &Source : Sources) {
8127 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8128 if (SrcEltTy.bitsLT(SmallestEltTy))
8129 SmallestEltTy = SrcEltTy;
8130 }
8131 unsigned ResMultiplier =
8132 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8133 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8134 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8135
8136 // If the source vector is too wide or too narrow, we may nevertheless be able
8137 // to construct a compatible shuffle either by concatenating it with UNDEF or
8138 // extracting a suitable range of elements.
8139 for (auto &Src : Sources) {
8140 EVT SrcVT = Src.ShuffleVec.getValueType();
8141
8142 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8143 uint64_t VTSize = VT.getFixedSizeInBits();
8144 if (SrcVTSize == VTSize)
8145 continue;
8146
8147 // This stage of the search produces a source with the same element type as
8148 // the original, but with a total width matching the BUILD_VECTOR output.
8149 EVT EltVT = SrcVT.getVectorElementType();
8150 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8151 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8152
8153 if (SrcVTSize < VTSize) {
8154 if (2 * SrcVTSize != VTSize)
8155 return SDValue();
8156 // We can pad out the smaller vector for free, so if it's part of a
8157 // shuffle...
8158 Src.ShuffleVec =
8159 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8160 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8161 continue;
8162 }
8163
8164 if (SrcVTSize != 2 * VTSize)
8165 return SDValue();
8166
8167 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8168 // Span too large for a VEXT to cope
8169 return SDValue();
8170 }
8171
8172 if (Src.MinElt >= NumSrcElts) {
8173 // The extraction can just take the second half
8174 Src.ShuffleVec =
8175 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8176 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8177 Src.WindowBase = -NumSrcElts;
8178 } else if (Src.MaxElt < NumSrcElts) {
8179 // The extraction can just take the first half
8180 Src.ShuffleVec =
8181 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8182 DAG.getConstant(0, dl, MVT::i32));
8183 } else {
8184 // An actual VEXT is needed
8185 SDValue VEXTSrc1 =
8186 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8187 DAG.getConstant(0, dl, MVT::i32));
8188 SDValue VEXTSrc2 =
8189 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8190 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8191
8192 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8193 VEXTSrc2,
8194 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8195 Src.WindowBase = -Src.MinElt;
8196 }
8197 }
8198
8199 // Another possible incompatibility occurs from the vector element types. We
8200 // can fix this by bitcasting the source vectors to the same type we intend
8201 // for the shuffle.
8202 for (auto &Src : Sources) {
8203 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8204 if (SrcEltTy == SmallestEltTy)
8205 continue;
8206 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8206, __extension__
__PRETTY_FUNCTION__))
;
8207 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8208 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8209 Src.WindowBase *= Src.WindowScale;
8210 }
8211
8212 // Final check before we try to actually produce a shuffle.
8213 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8215, __extension__
__PRETTY_FUNCTION__));; } } while (false)
8214 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8215, __extension__
__PRETTY_FUNCTION__));; } } while (false)
8215 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 8215, __extension__
__PRETTY_FUNCTION__));; } } while (false)
;
8216
8217 // The stars all align, our next step is to produce the mask for the shuffle.
8218 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8219 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8220 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8221 SDValue Entry = Op.getOperand(i);
8222 if (Entry.isUndef())
8223 continue;
8224
8225 auto Src = llvm::find(Sources, Entry.getOperand(0));
8226 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8227
8228 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8229 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8230 // segment.
8231 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8232 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8233 VT.getScalarSizeInBits());
8234 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8235
8236 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8237 // starting at the appropriate offset.
8238 int *LaneMask = &Mask[i * ResMultiplier];
8239
8240 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8241 ExtractBase += NumElts * (Src - Sources.begin());
8242 for (int j = 0; j < LanesDefined; ++j)
8243 LaneMask[j] = ExtractBase + j;
8244 }
8245
8246
8247 // We can't handle more than two sources. This should have already
8248 // been checked before this point.
8249 assert(Sources.size() <= 2 && "Too many sources!")(static_cast <bool> (Sources.size() <= 2 && "Too many sources!"
) ? void (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8249, __extension__
__PRETTY_FUNCTION__))
;
8250
8251 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8252 for (unsigned i = 0; i < Sources.size(); ++i)
8253 ShuffleOps[i] = Sources[i].ShuffleVec;
8254
8255 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8256 ShuffleOps[1], Mask, DAG);
8257 if (!Shuffle)
8258 return SDValue();
8259 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8260}
8261
8262enum ShuffleOpCodes {
8263 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8264 OP_VREV,
8265 OP_VDUP0,
8266 OP_VDUP1,
8267 OP_VDUP2,
8268 OP_VDUP3,
8269 OP_VEXT1,
8270 OP_VEXT2,
8271 OP_VEXT3,
8272 OP_VUZPL, // VUZP, left result
8273 OP_VUZPR, // VUZP, right result
8274 OP_VZIPL, // VZIP, left result
8275 OP_VZIPR, // VZIP, right result
8276 OP_VTRNL, // VTRN, left result
8277 OP_VTRNR // VTRN, right result
8278};
8279
8280static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8281 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8282 switch (OpNum) {
8283 case OP_COPY:
8284 case OP_VREV:
8285 case OP_VDUP0:
8286 case OP_VDUP1:
8287 case OP_VDUP2:
8288 case OP_VDUP3:
8289 return true;
8290 }
8291 return false;
8292}
8293
8294/// isShuffleMaskLegal - Targets can use this to indicate that they only
8295/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8296/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8297/// are assumed to be legal.
8298bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
8299 if (VT.getVectorNumElements() == 4 &&
8300 (VT.is128BitVector() || VT.is64BitVector())) {
8301 unsigned PFIndexes[4];
8302 for (unsigned i = 0; i != 4; ++i) {
8303 if (M[i] < 0)
8304 PFIndexes[i] = 8;
8305 else
8306 PFIndexes[i] = M[i];
8307 }
8308
8309 // Compute the index in the perfect shuffle table.
8310 unsigned PFTableIndex =
8311 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8312 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8313 unsigned Cost = (PFEntry >> 30);
8314
8315 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8316 return true;
8317 }
8318
8319 bool ReverseVEXT, isV_UNDEF;
8320 unsigned Imm, WhichResult;
8321
8322 unsigned EltSize = VT.getScalarSizeInBits();
8323 if (EltSize >= 32 ||
8324 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
8325 ShuffleVectorInst::isIdentityMask(M) ||
8326 isVREVMask(M, VT, 64) ||
8327 isVREVMask(M, VT, 32) ||
8328 isVREVMask(M, VT, 16))
8329 return true;
8330 else if (Subtarget->hasNEON() &&
8331 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8332 isVTBLMask(M, VT) ||
8333 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8334 return true;
8335 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8336 isReverseMask(M, VT))
8337 return true;
8338 else if (Subtarget->hasMVEIntegerOps() &&
8339 (isVMOVNMask(M, VT, true, false) ||
8340 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8341 return true;
8342 else
8343 return false;
8344}
8345
8346/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8347/// the specified operations to build the shuffle.
8348static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8349 SDValue RHS, SelectionDAG &DAG,
8350 const SDLoc &dl) {
8351 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8352 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8353 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8354
8355 if (OpNum == OP_COPY) {
8356 if (LHSID == (1*9+2)*9+3) return LHS;
8357 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!") ? void (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8357, __extension__
__PRETTY_FUNCTION__))
;
8358 return RHS;
8359 }
8360
8361 SDValue OpLHS, OpRHS;
8362 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8363 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8364 EVT VT = OpLHS.getValueType();
8365
8366 switch (OpNum) {
8367 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 8367)
;
8368 case OP_VREV:
8369 // VREV divides the vector in half and swaps within the half.
8370 if (VT.getVectorElementType() == MVT::i32 ||
8371 VT.getVectorElementType() == MVT::f32)
8372 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8373 // vrev <4 x i16> -> VREV32
8374 if (VT.getVectorElementType() == MVT::i16 ||
8375 VT.getVectorElementType() == MVT::f16)
8376 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8377 // vrev <4 x i8> -> VREV16
8378 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8378, __extension__
__PRETTY_FUNCTION__))
;
8379 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8380 case OP_VDUP0:
8381 case OP_VDUP1:
8382 case OP_VDUP2:
8383 case OP_VDUP3:
8384 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8385 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8386 case OP_VEXT1:
8387 case OP_VEXT2:
8388 case OP_VEXT3:
8389 return DAG.getNode(ARMISD::VEXT, dl, VT,
8390 OpLHS, OpRHS,
8391 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8392 case OP_VUZPL:
8393 case OP_VUZPR:
8394 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8395 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8396 case OP_VZIPL:
8397 case OP_VZIPR:
8398 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8399 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8400 case OP_VTRNL:
8401 case OP_VTRNR:
8402 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8403 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8404 }
8405}
8406
8407static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
8408 ArrayRef<int> ShuffleMask,
8409 SelectionDAG &DAG) {
8410 // Check to see if we can use the VTBL instruction.
8411 SDValue V1 = Op.getOperand(0);
8412 SDValue V2 = Op.getOperand(1);
8413 SDLoc DL(Op);
8414
8415 SmallVector<SDValue, 8> VTBLMask;
8416 for (int I : ShuffleMask)
8417 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8418
8419 if (V2.getNode()->isUndef())
8420 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8421 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8422
8423 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8424 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8425}
8426
8427static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
8428 SDLoc DL(Op);
8429 EVT VT = Op.getValueType();
8430
8431 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? void (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8432, __extension__
__PRETTY_FUNCTION__))
8432 "Expect an v8i16/v16i8 type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? void (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8432, __extension__
__PRETTY_FUNCTION__))
;
8433 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8434 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8435 // extract the first 8 bytes into the top double word and the last 8 bytes
8436 // into the bottom double word, through a new vector shuffle that will be
8437 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8438 std::vector<int> NewMask;
8439 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8440 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8441 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8442 NewMask.push_back(i);
8443 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8444}
8445
8446static EVT getVectorTyFromPredicateVector(EVT VT) {
8447 switch (VT.getSimpleVT().SimpleTy) {
8448 case MVT::v2i1:
8449 return MVT::v2f64;
8450 case MVT::v4i1:
8451 return MVT::v4i32;
8452 case MVT::v8i1:
8453 return MVT::v8i16;
8454 case MVT::v16i1:
8455 return MVT::v16i8;
8456 default:
8457 llvm_unreachable("Unexpected vector predicate type")::llvm::llvm_unreachable_internal("Unexpected vector predicate type"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8457)
;
8458 }
8459}
8460
8461static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT,
8462 SelectionDAG &DAG) {
8463 // Converting from boolean predicates to integers involves creating a vector
8464 // of all ones or all zeroes and selecting the lanes based upon the real
8465 // predicate.
8466 SDValue AllOnes =
8467 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8468 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8469
8470 SDValue AllZeroes =
8471 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8472 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8473
8474 // Get full vector type from predicate type
8475 EVT NewVT = getVectorTyFromPredicateVector(VT);
8476
8477 SDValue RecastV1;
8478 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8479 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8480 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8481 // since we know in hardware the sizes are really the same.
8482 if (VT != MVT::v16i1)
8483 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8484 else
8485 RecastV1 = Pred;
8486
8487 // Select either all ones or zeroes depending upon the real predicate bits.
8488 SDValue PredAsVector =
8489 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8490
8491 // Recast our new predicate-as-integer v16i8 vector into something
8492 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8493 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8494}
8495
8496static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
8497 const ARMSubtarget *ST) {
8498 EVT VT = Op.getValueType();
8499 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8500 ArrayRef<int> ShuffleMask = SVN->getMask();
8501
8502 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No support for vector shuffle of boolean predicates") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8503, __extension__
__PRETTY_FUNCTION__))
8503 "No support for vector shuffle of boolean predicates")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"No support for vector shuffle of boolean predicates") ? void
(0) : __assert_fail ("ST->hasMVEIntegerOps() && \"No support for vector shuffle of boolean predicates\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8503, __extension__
__PRETTY_FUNCTION__))
;
8504
8505 SDValue V1 = Op.getOperand(0);
8506 SDLoc dl(Op);
8507 if (isReverseMask(ShuffleMask, VT)) {
8508 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8509 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8510 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8511 DAG.getConstant(16, dl, MVT::i32));
8512 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8513 }
8514
8515 // Until we can come up with optimised cases for every single vector
8516 // shuffle in existence we have chosen the least painful strategy. This is
8517 // to essentially promote the boolean predicate to a 8-bit integer, where
8518 // each predicate represents a byte. Then we fall back on a normal integer
8519 // vector shuffle and convert the result back into a predicate vector. In
8520 // many cases the generated code might be even better than scalar code
8521 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8522 // fields in a register into 8 other arbitrary 2-bit fields!
8523 SDValue PredAsVector = PromoteMVEPredVector(dl, V1, VT, DAG);
8524 EVT NewVT = PredAsVector.getValueType();
8525
8526 // Do the shuffle!
8527 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector,
8528 DAG.getUNDEF(NewVT), ShuffleMask);
8529
8530 // Now return the result of comparing the shuffled vector with zero,
8531 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8532 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8533 if (VT == MVT::v2i1) {
8534 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8535 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8536 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8537 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8538 }
8539 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8540 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8541}
8542
8543static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,
8544 ArrayRef<int> ShuffleMask,
8545 SelectionDAG &DAG) {
8546 // Attempt to lower the vector shuffle using as many whole register movs as
8547 // possible. This is useful for types smaller than 32bits, which would
8548 // often otherwise become a series for grp movs.
8549 SDLoc dl(Op);
8550 EVT VT = Op.getValueType();
8551 if (VT.getScalarSizeInBits() >= 32)
8552 return SDValue();
8553
8554 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Unexpected vector type") ? void
(0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8555, __extension__
__PRETTY_FUNCTION__))
8555 "Unexpected vector type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v8f16
|| VT == MVT::v16i8) && "Unexpected vector type") ? void
(0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) && \"Unexpected vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8555, __extension__
__PRETTY_FUNCTION__))
;
8556 int NumElts = VT.getVectorNumElements();
8557 int QuarterSize = NumElts / 4;
8558 // The four final parts of the vector, as i32's
8559 SDValue Parts[4];
8560
8561 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8562 // <u,u,u,u>), returning the vmov lane index
8563 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8564 // Detect which mov lane this would be from the first non-undef element.
8565 int MovIdx = -1;
8566 for (int i = 0; i < Length; i++) {
8567 if (ShuffleMask[Start + i] >= 0) {
8568 if (ShuffleMask[Start + i] % Length != i)
8569 return -1;
8570 MovIdx = ShuffleMask[Start + i] / Length;
8571 break;
8572 }
8573 }
8574 // If all items are undef, leave this for other combines
8575 if (MovIdx == -1)
8576 return -1;
8577 // Check the remaining values are the correct part of the same mov
8578 for (int i = 1; i < Length; i++) {
8579 if (ShuffleMask[Start + i] >= 0 &&
8580 (ShuffleMask[Start + i] / Length != MovIdx ||
8581 ShuffleMask[Start + i] % Length != i))
8582 return -1;
8583 }
8584 return MovIdx;
8585 };
8586
8587 for (int Part = 0; Part < 4; ++Part) {
8588 // Does this part look like a mov
8589 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8590 if (Elt != -1) {
8591 SDValue Input = Op->getOperand(0);
8592 if (Elt >= 4) {
8593 Input = Op->getOperand(1);
8594 Elt -= 4;
8595 }
8596 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8597 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8598 DAG.getConstant(Elt, dl, MVT::i32));
8599 }
8600 }
8601
8602 // Nothing interesting found, just return
8603 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8604 return SDValue();
8605
8606 // The other parts need to be built with the old shuffle vector, cast to a
8607 // v4i32 and extract_vector_elts
8608 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8609 SmallVector<int, 16> NewShuffleMask;
8610 for (int Part = 0; Part < 4; ++Part)
8611 for (int i = 0; i < QuarterSize; i++)
8612 NewShuffleMask.push_back(
8613 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8614 SDValue NewShuffle = DAG.getVectorShuffle(
8615 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8616 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8617
8618 for (int Part = 0; Part < 4; ++Part)
8619 if (!Parts[Part])
8620 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8621 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8622 }
8623 // Build a vector out of the various parts and bitcast it back to the original
8624 // type.
8625 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8626 return DAG.getBitcast(VT, NewVec);
8627}
8628
8629static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,
8630 ArrayRef<int> ShuffleMask,
8631 SelectionDAG &DAG) {
8632 SDValue V1 = Op.getOperand(0);
8633 SDValue V2 = Op.getOperand(1);
8634 EVT VT = Op.getValueType();
8635 unsigned NumElts = VT.getVectorNumElements();
8636
8637 // An One-Off Identity mask is one that is mostly an identity mask from as
8638 // single source but contains a single element out-of-place, either from a
8639 // different vector or from another position in the same vector. As opposed to
8640 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8641 // pair directly.
8642 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8643 int &OffElement) {
8644 OffElement = -1;
8645 int NonUndef = 0;
8646 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8647 if (Mask[i] == -1)
8648 continue;
8649 NonUndef++;
8650 if (Mask[i] != i + BaseOffset) {
8651 if (OffElement == -1)
8652 OffElement = i;
8653 else
8654 return false;
8655 }
8656 }
8657 return NonUndef > 2 && OffElement != -1;
8658 };
8659 int OffElement;
8660 SDValue VInput;
8661 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8662 VInput = V1;
8663 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8664 VInput = V2;
8665 else
8666 return SDValue();
8667
8668 SDLoc dl(Op);
8669 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8670 ? MVT::i32
8671 : VT.getScalarType();
8672 SDValue Elt = DAG.getNode(
8673 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8674 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8675 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8676 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8677 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8678}
8679
8680static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
8681 const ARMSubtarget *ST) {
8682 SDValue V1 = Op.getOperand(0);
8683 SDValue V2 = Op.getOperand(1);
8684 SDLoc dl(Op);
8685 EVT VT = Op.getValueType();
8686 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8687 unsigned EltSize = VT.getScalarSizeInBits();
8688
8689 if (ST->hasMVEIntegerOps() && EltSize == 1)
8690 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8691
8692 // Convert shuffles that are directly supported on NEON to target-specific
8693 // DAG nodes, instead of keeping them as shuffles and matching them again
8694 // during code selection. This is more efficient and avoids the possibility
8695 // of inconsistencies between legalization and selection.
8696 // FIXME: floating-point vectors should be canonicalized to integer vectors
8697 // of the same time so that they get CSEd properly.
8698 ArrayRef<int> ShuffleMask = SVN->getMask();
8699
8700 if (EltSize <= 32) {
8701 if (SVN->isSplat()) {
8702 int Lane = SVN->getSplatIndex();
8703 // If this is undef splat, generate it via "just" vdup, if possible.
8704 if (Lane == -1) Lane = 0;
8705
8706 // Test if V1 is a SCALAR_TO_VECTOR.
8707 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8708 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8709 }
8710 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8711 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8712 // reaches it).
8713 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8714 !isa<ConstantSDNode>(V1.getOperand(0))) {
8715 bool IsScalarToVector = true;
8716 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8717 if (!V1.getOperand(i).isUndef()) {
8718 IsScalarToVector = false;
8719 break;
8720 }
8721 if (IsScalarToVector)
8722 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8723 }
8724 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8725 DAG.getConstant(Lane, dl, MVT::i32));
8726 }
8727
8728 bool ReverseVEXT = false;
8729 unsigned Imm = 0;
8730 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8731 if (ReverseVEXT)
8732 std::swap(V1, V2);
8733 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8734 DAG.getConstant(Imm, dl, MVT::i32));
8735 }
8736
8737 if (isVREVMask(ShuffleMask, VT, 64))
8738 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8739 if (isVREVMask(ShuffleMask, VT, 32))
8740 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8741 if (isVREVMask(ShuffleMask, VT, 16))
8742 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8743
8744 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8745 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8746 DAG.getConstant(Imm, dl, MVT::i32));
8747 }
8748
8749 // Check for Neon shuffles that modify both input vectors in place.
8750 // If both results are used, i.e., if there are two shuffles with the same
8751 // source operands and with masks corresponding to both results of one of
8752 // these operations, DAG memoization will ensure that a single node is
8753 // used for both shuffles.
8754 unsigned WhichResult = 0;
8755 bool isV_UNDEF = false;
8756 if (ST->hasNEON()) {
8757 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8758 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8759 if (isV_UNDEF)
8760 V2 = V1;
8761 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8762 .getValue(WhichResult);
8763 }
8764 }
8765 if (ST->hasMVEIntegerOps()) {
8766 if (isVMOVNMask(ShuffleMask, VT, false, false))
8767 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8768 DAG.getConstant(0, dl, MVT::i32));
8769 if (isVMOVNMask(ShuffleMask, VT, true, false))
8770 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8771 DAG.getConstant(1, dl, MVT::i32));
8772 if (isVMOVNMask(ShuffleMask, VT, true, true))
8773 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8774 DAG.getConstant(1, dl, MVT::i32));
8775 }
8776
8777 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8778 // shuffles that produce a result larger than their operands with:
8779 // shuffle(concat(v1, undef), concat(v2, undef))
8780 // ->
8781 // shuffle(concat(v1, v2), undef)
8782 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8783 //
8784 // This is useful in the general case, but there are special cases where
8785 // native shuffles produce larger results: the two-result ops.
8786 //
8787 // Look through the concat when lowering them:
8788 // shuffle(concat(v1, v2), undef)
8789 // ->
8790 // concat(VZIP(v1, v2):0, :1)
8791 //
8792 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8793 SDValue SubV1 = V1->getOperand(0);
8794 SDValue SubV2 = V1->getOperand(1);
8795 EVT SubVT = SubV1.getValueType();
8796
8797 // We expect these to have been canonicalized to -1.
8798 assert(llvm::all_of(ShuffleMask, [&](int i) {(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8800, __extension__
__PRETTY_FUNCTION__))
8799 return i < (int)VT.getVectorNumElements();(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8800, __extension__
__PRETTY_FUNCTION__))
8800 }) && "Unexpected shuffle index into UNDEF operand!")(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8800, __extension__
__PRETTY_FUNCTION__))
;
8801
8802 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8803 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8804 if (isV_UNDEF)
8805 SubV2 = SubV1;
8806 assert((WhichResult == 0) &&(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8807, __extension__
__PRETTY_FUNCTION__))
8807 "In-place shuffle of concat can only have one result!")(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8807, __extension__
__PRETTY_FUNCTION__))
;
8808 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8809 SubV1, SubV2);
8810 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8811 Res.getValue(1));
8812 }
8813 }
8814 }
8815
8816 if (ST->hasMVEIntegerOps() && EltSize <= 32)
8817 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8818 return V;
8819
8820 // If the shuffle is not directly supported and it has 4 elements, use
8821 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8822 unsigned NumElts = VT.getVectorNumElements();
8823 if (NumElts == 4) {
8824 unsigned PFIndexes[4];
8825 for (unsigned i = 0; i != 4; ++i) {
8826 if (ShuffleMask[i] < 0)
8827 PFIndexes[i] = 8;
8828 else
8829 PFIndexes[i] = ShuffleMask[i];
8830 }
8831
8832 // Compute the index in the perfect shuffle table.
8833 unsigned PFTableIndex =
8834 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8835 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8836 unsigned Cost = (PFEntry >> 30);
8837
8838 if (Cost <= 4) {
8839 if (ST->hasNEON())
8840 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8841 else if (isLegalMVEShuffleOp(PFEntry)) {
8842 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8843 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8844 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8845 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8846 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8847 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8848 }
8849 }
8850 }
8851
8852 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8853 if (EltSize >= 32) {
8854 // Do the expansion with floating-point types, since that is what the VFP
8855 // registers are defined to use, and since i64 is not legal.
8856 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8857 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8858 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8859 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8860 SmallVector<SDValue, 8> Ops;
8861 for (unsigned i = 0; i < NumElts; ++i) {
8862 if (ShuffleMask[i] < 0)
8863 Ops.push_back(DAG.getUNDEF(EltVT));
8864 else
8865 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8866 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8867 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8868 dl, MVT::i32)));
8869 }
8870 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8871 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8872 }
8873
8874 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8875 isReverseMask(ShuffleMask, VT))
8876 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
8877
8878 if (ST->hasNEON() && VT == MVT::v8i8)
8879 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
8880 return NewOp;
8881
8882 if (ST->hasMVEIntegerOps())
8883 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
8884 return NewOp;
8885
8886 return SDValue();
8887}
8888
8889static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
8890 const ARMSubtarget *ST) {
8891 EVT VecVT = Op.getOperand(0).getValueType();
8892 SDLoc dl(Op);
8893
8894 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8895, __extension__
__PRETTY_FUNCTION__))
8895 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8895, __extension__
__PRETTY_FUNCTION__))
;
8896
8897 SDValue Conv =
8898 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8899 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
8900 unsigned LaneWidth =
8901 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
8902 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
8903 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
8904 Op.getOperand(1), DAG.getValueType(MVT::i1));
8905 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
8906 DAG.getConstant(~Mask, dl, MVT::i32));
8907 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
8908}
8909
8910SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
8911 SelectionDAG &DAG) const {
8912 // INSERT_VECTOR_ELT is legal only for immediate indexes.
8913 SDValue Lane = Op.getOperand(2);
8914 if (!isa<ConstantSDNode>(Lane))
8915 return SDValue();
8916
8917 SDValue Elt = Op.getOperand(1);
8918 EVT EltVT = Elt.getValueType();
8919
8920 if (Subtarget->hasMVEIntegerOps() &&
8921 Op.getValueType().getScalarSizeInBits() == 1)
8922 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
8923
8924 if (getTypeAction(*DAG.getContext(), EltVT) ==
8925 TargetLowering::TypePromoteFloat) {
8926 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
8927 // but the type system will try to do that if we don't intervene.
8928 // Reinterpret any such vector-element insertion as one with the
8929 // corresponding integer types.
8930
8931 SDLoc dl(Op);
8932
8933 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
8934 assert(getTypeAction(*DAG.getContext(), IEltVT) !=(static_cast <bool> (getTypeAction(*DAG.getContext(), IEltVT
) != TargetLowering::TypePromoteFloat) ? void (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8935, __extension__
__PRETTY_FUNCTION__))
8935 TargetLowering::TypePromoteFloat)(static_cast <bool> (getTypeAction(*DAG.getContext(), IEltVT
) != TargetLowering::TypePromoteFloat) ? void (0) : __assert_fail
("getTypeAction(*DAG.getContext(), IEltVT) != TargetLowering::TypePromoteFloat"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8935, __extension__
__PRETTY_FUNCTION__))
;
8936
8937 SDValue VecIn = Op.getOperand(0);
8938 EVT VecVT = VecIn.getValueType();
8939 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
8940 VecVT.getVectorNumElements());
8941
8942 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
8943 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
8944 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
8945 IVecIn, IElt, Lane);
8946 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
8947 }
8948
8949 return Op;
8950}
8951
8952static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,
8953 const ARMSubtarget *ST) {
8954 EVT VecVT = Op.getOperand(0).getValueType();
8955 SDLoc dl(Op);
8956
8957 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8958, __extension__
__PRETTY_FUNCTION__))
8958 "LowerINSERT_VECTOR_ELT_i1 called without MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"LowerINSERT_VECTOR_ELT_i1 called without MVE!") ? void (0) :
__assert_fail ("ST->hasMVEIntegerOps() && \"LowerINSERT_VECTOR_ELT_i1 called without MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8958, __extension__
__PRETTY_FUNCTION__))
;
8959
8960 SDValue Conv =
8961 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
8962 unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
8963 unsigned LaneWidth =
8964 getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;
8965 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
8966 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
8967 return Shift;
8968}
8969
8970static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
8971 const ARMSubtarget *ST) {
8972 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
8973 SDValue Lane = Op.getOperand(1);
8974 if (!isa<ConstantSDNode>(Lane))
8975 return SDValue();
8976
8977 SDValue Vec = Op.getOperand(0);
8978 EVT VT = Vec.getValueType();
8979
8980 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
8981 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
8982
8983 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
8984 SDLoc dl(Op);
8985 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
8986 }
8987
8988 return Op;
8989}
8990
8991static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
8992 const ARMSubtarget *ST) {
8993 SDLoc dl(Op);
8994 assert(Op.getValueType().getScalarSizeInBits() == 1 &&(static_cast <bool> (Op.getValueType().getScalarSizeInBits
() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? void (0) : __assert_fail ("Op.getValueType().getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8995, __extension__
__PRETTY_FUNCTION__))
8995 "Unexpected custom CONCAT_VECTORS lowering")(static_cast <bool> (Op.getValueType().getScalarSizeInBits
() == 1 && "Unexpected custom CONCAT_VECTORS lowering"
) ? void (0) : __assert_fail ("Op.getValueType().getScalarSizeInBits() == 1 && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8995, __extension__
__PRETTY_FUNCTION__))
;
8996 assert(isPowerOf2_32(Op.getNumOperands()) &&(static_cast <bool> (isPowerOf2_32(Op.getNumOperands())
&& "Unexpected custom CONCAT_VECTORS lowering") ? void
(0) : __assert_fail ("isPowerOf2_32(Op.getNumOperands()) && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8997, __extension__
__PRETTY_FUNCTION__))
8997 "Unexpected custom CONCAT_VECTORS lowering")(static_cast <bool> (isPowerOf2_32(Op.getNumOperands())
&& "Unexpected custom CONCAT_VECTORS lowering") ? void
(0) : __assert_fail ("isPowerOf2_32(Op.getNumOperands()) && \"Unexpected custom CONCAT_VECTORS lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8997, __extension__
__PRETTY_FUNCTION__))
;
8998 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE") ? void (0)
: __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8999, __extension__
__PRETTY_FUNCTION__))
8999 "CONCAT_VECTORS lowering only supported for MVE")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE") ? void (0)
: __assert_fail ("ST->hasMVEIntegerOps() && \"CONCAT_VECTORS lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 8999, __extension__
__PRETTY_FUNCTION__))
;
9000
9001 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9002 EVT Op1VT = V1.getValueType();
9003 EVT Op2VT = V2.getValueType();
9004 assert(Op1VT == Op2VT && "Operand types don't match!")(static_cast <bool> (Op1VT == Op2VT && "Operand types don't match!"
) ? void (0) : __assert_fail ("Op1VT == Op2VT && \"Operand types don't match!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9004, __extension__
__PRETTY_FUNCTION__))
;
9005 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9006
9007 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9008 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9009
9010 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9011 // promoted to v8i16, etc.
9012 MVT ElType =
9013 getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
9014 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9015
9016 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9017 // to be the right size for the destination. For example, if Op1 is v4i1
9018 // then the promoted vector is v4i32. The result of concatenation gives a
9019 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9020 // needs truncating to i16 and inserting in the result.
9021 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9022 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9023 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9024 EVT NewVT = NewV.getValueType();
9025 EVT ConcatVT = ConVec.getValueType();
9026 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9027 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9028 DAG.getIntPtrConstant(i, dl));
9029 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9030 DAG.getConstant(j, dl, MVT::i32));
9031 }
9032 return ConVec;
9033 };
9034 unsigned j = 0;
9035 ConVec = ExtractInto(NewV1, ConVec, j);
9036 ConVec = ExtractInto(NewV2, ConVec, j);
9037
9038 // Now return the result of comparing the subvector with zero, which will
9039 // generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1 we
9040 // convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
9041 if (VT == MVT::v2i1) {
9042 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, ConVec);
9043 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
9044 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9045 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9046 }
9047 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9048 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9049 };
9050
9051 // Concat each pair of subvectors and pack into the lower half of the array.
9052 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9053 while (ConcatOps.size() > 1) {
9054 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9055 SDValue V1 = ConcatOps[I];
9056 SDValue V2 = ConcatOps[I + 1];
9057 ConcatOps[I / 2] = ConcatPair(V1, V2);
9058 }
9059 ConcatOps.resize(ConcatOps.size() / 2);
9060 }
9061 return ConcatOps[0];
9062}
9063
9064static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
9065 const ARMSubtarget *ST) {
9066 EVT VT = Op->getValueType(0);
9067 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9068 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9069
9070 // The only time a CONCAT_VECTORS operation can have legal types is when
9071 // two 64-bit vectors are concatenated to a 128-bit vector.
9072 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9073, __extension__
__PRETTY_FUNCTION__))
9073 "unexpected CONCAT_VECTORS")(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9073, __extension__
__PRETTY_FUNCTION__))
;
9074 SDLoc dl(Op);
9075 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9076 SDValue Op0 = Op.getOperand(0);
9077 SDValue Op1 = Op.getOperand(1);
9078 if (!Op0.isUndef())
9079 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9080 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9081 DAG.getIntPtrConstant(0, dl));
9082 if (!Op1.isUndef())
9083 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9084 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9085 DAG.getIntPtrConstant(1, dl));
9086 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9087}
9088
9089static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
9090 const ARMSubtarget *ST) {
9091 SDValue V1 = Op.getOperand(0);
9092 SDValue V2 = Op.getOperand(1);
9093 SDLoc dl(Op);
9094 EVT VT = Op.getValueType();
9095 EVT Op1VT = V1.getValueType();
9096 unsigned NumElts = VT.getVectorNumElements();
9097 unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue();
9098
9099 assert(VT.getScalarSizeInBits() == 1 &&(static_cast <bool> (VT.getScalarSizeInBits() == 1 &&
"Unexpected custom EXTRACT_SUBVECTOR lowering") ? void (0) :
__assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9100, __extension__
__PRETTY_FUNCTION__))
9100 "Unexpected custom EXTRACT_SUBVECTOR lowering")(static_cast <bool> (VT.getScalarSizeInBits() == 1 &&
"Unexpected custom EXTRACT_SUBVECTOR lowering") ? void (0) :
__assert_fail ("VT.getScalarSizeInBits() == 1 && \"Unexpected custom EXTRACT_SUBVECTOR lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9100, __extension__
__PRETTY_FUNCTION__))
;
9101 assert(ST->hasMVEIntegerOps() &&(static_cast <bool> (ST->hasMVEIntegerOps() &&
"EXTRACT_SUBVECTOR lowering only supported for MVE") ? void (
0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9102, __extension__
__PRETTY_FUNCTION__))
9102 "EXTRACT_SUBVECTOR lowering only supported for MVE")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"EXTRACT_SUBVECTOR lowering only supported for MVE") ? void (
0) : __assert_fail ("ST->hasMVEIntegerOps() && \"EXTRACT_SUBVECTOR lowering only supported for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9102, __extension__
__PRETTY_FUNCTION__))
;
9103
9104 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9105
9106 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9107 // promoted to v8i16, etc.
9108
9109 MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
9110
9111 if (NumElts == 2) {
9112 EVT SubVT = MVT::v4i32;
9113 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9114 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9115 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9116 DAG.getIntPtrConstant(i, dl));
9117 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9118 DAG.getConstant(j, dl, MVT::i32));
9119 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9120 DAG.getConstant(j + 1, dl, MVT::i32));
9121 }
9122 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9123 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9124 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9125 }
9126
9127 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9128 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9129 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9130 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9131 DAG.getIntPtrConstant(i, dl));
9132 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9133 DAG.getConstant(j, dl, MVT::i32));
9134 }
9135
9136 // Now return the result of comparing the subvector with zero,
9137 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9138 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9139 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9140}
9141
9142// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9143static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG,
9144 const ARMSubtarget *ST) {
9145 assert(ST->hasMVEIntegerOps() && "Expected MVE!")(static_cast <bool> (ST->hasMVEIntegerOps() &&
"Expected MVE!") ? void (0) : __assert_fail ("ST->hasMVEIntegerOps() && \"Expected MVE!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9145, __extension__
__PRETTY_FUNCTION__))
;
9146 EVT VT = N->getValueType(0);
9147 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&(static_cast <bool> ((VT == MVT::v16i1 || VT == MVT::v8i1
|| VT == MVT::v4i1) && "Expected a vector i1 type!")
? void (0) : __assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9148, __extension__
__PRETTY_FUNCTION__))
9148 "Expected a vector i1 type!")(static_cast <bool> ((VT == MVT::v16i1 || VT == MVT::v8i1
|| VT == MVT::v4i1) && "Expected a vector i1 type!")
? void (0) : __assert_fail ("(VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) && \"Expected a vector i1 type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9148, __extension__
__PRETTY_FUNCTION__))
;
9149 SDValue Op = N->getOperand(0);
9150 EVT FromVT = Op.getValueType();
9151 SDLoc DL(N);
9152
9153 SDValue And =
9154 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9155 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9156 DAG.getCondCode(ISD::SETNE));
9157}
9158
9159static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG,
9160 const ARMSubtarget *Subtarget) {
9161 if (!Subtarget->hasMVEIntegerOps())
9162 return SDValue();
9163
9164 EVT ToVT = N->getValueType(0);
9165 if (ToVT.getScalarType() == MVT::i1)
9166 return LowerTruncatei1(N, DAG, Subtarget);
9167
9168 // MVE does not have a single instruction to perform the truncation of a v4i32
9169 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9170 // Most of the instructions in MVE follow the 'Beats' system, where moving
9171 // values from different lanes is usually something that the instructions
9172 // avoid.
9173 //
9174 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9175 // which take a the top/bottom half of a larger lane and extend it (or do the
9176 // opposite, truncating into the top/bottom lane from a larger lane). Note
9177 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9178 // bottom 16bits from each vector lane. This works really well with T/B
9179 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9180 // to move order.
9181 //
9182 // But truncates and sext/zext are always going to be fairly common from llvm.
9183 // We have several options for how to deal with them:
9184 // - Wherever possible combine them into an instruction that makes them
9185 // "free". This includes loads/stores, which can perform the trunc as part
9186 // of the memory operation. Or certain shuffles that can be turned into
9187 // VMOVN/VMOVL.
9188 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9189 // trunc(mul(sext(a), sext(b))) may become
9190 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9191 // this case can use VMULL). This is performed in the
9192 // MVELaneInterleavingPass.
9193 // - Otherwise we have an option. By default we would expand the
9194 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9195 // registers. One for each vector lane in the vector. This can obviously be
9196 // very expensive.
9197 // - The other option is to use the fact that loads/store can extend/truncate
9198 // to turn a trunc into two truncating stack stores and a stack reload. This
9199 // becomes 3 back-to-back memory operations, but at least that is less than
9200 // all the insert/extracts.
9201 //
9202 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9203 // are either optimized where they can be, or eventually lowered into stack
9204 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9205 // two early, where other instructions would be better, and stops us from
9206 // having to reconstruct multiple buildvector shuffles into loads/stores.
9207 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9208 return SDValue();
9209 EVT FromVT = N->getOperand(0).getValueType();
9210 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9211 return SDValue();
9212
9213 SDValue Lo, Hi;
9214 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9215 SDLoc DL(N);
9216 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9217}
9218
9219static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG,
9220 const ARMSubtarget *Subtarget) {
9221 if (!Subtarget->hasMVEIntegerOps())
9222 return SDValue();
9223
9224 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9225
9226 EVT ToVT = N->getValueType(0);
9227 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9228 return SDValue();
9229 SDValue Op = N->getOperand(0);
9230 EVT FromVT = Op.getValueType();
9231 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9232 return SDValue();
9233
9234 SDLoc DL(N);
9235 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9236 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9237 ExtVT = MVT::v8i16;
9238
9239 unsigned Opcode =
9240 N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;
9241 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9242 SDValue Ext1 = Ext.getValue(1);
9243
9244 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9245 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9246 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9247 }
9248
9249 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9250}
9251
9252/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9253/// element has been zero/sign-extended, depending on the isSigned parameter,
9254/// from an integer type half its size.
9255static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
9256 bool isSigned) {
9257 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9258 EVT VT = N->getValueType(0);
9259 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9260 SDNode *BVN = N->getOperand(0).getNode();
9261 if (BVN->getValueType(0) != MVT::v4i32 ||
9262 BVN->getOpcode() != ISD::BUILD_VECTOR)
9263 return false;
9264 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9265 unsigned HiElt = 1 - LoElt;
9266 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9267 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9268 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9269 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9270 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9271 return false;
9272 if (isSigned) {
9273 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9274 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9275 return true;
9276 } else {
9277 if (Hi0->isZero() && Hi1->isZero())
9278 return true;
9279 }
9280 return false;
9281 }
9282
9283 if (N->getOpcode() != ISD::BUILD_VECTOR)
9284 return false;
9285
9286 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9287 SDNode *Elt = N->getOperand(i).getNode();
9288 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9289 unsigned EltSize = VT.getScalarSizeInBits();
9290 unsigned HalfSize = EltSize / 2;
9291 if (isSigned) {
9292 if (!isIntN(HalfSize, C->getSExtValue()))
9293 return false;
9294 } else {
9295 if (!isUIntN(HalfSize, C->getZExtValue()))
9296 return false;
9297 }
9298 continue;
9299 }
9300 return false;
9301 }
9302
9303 return true;
9304}
9305
9306/// isSignExtended - Check if a node is a vector value that is sign-extended
9307/// or a constant BUILD_VECTOR with sign-extended elements.
9308static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
9309 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9310 return true;
9311 if (isExtendedBUILD_VECTOR(N, DAG, true))
9312 return true;
9313 return false;
9314}
9315
9316/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9317/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9318static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
9319 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9320 ISD::isZEXTLoad(N))
9321 return true;
9322 if (isExtendedBUILD_VECTOR(N, DAG, false))
9323 return true;
9324 return false;
9325}
9326
9327static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9328 if (OrigVT.getSizeInBits() >= 64)
9329 return OrigVT;
9330
9331 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9331, __extension__
__PRETTY_FUNCTION__))
;
9332
9333 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9334 switch (OrigSimpleTy) {
9335 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9335)
;
9336 case MVT::v2i8:
9337 case MVT::v2i16:
9338 return MVT::v2i32;
9339 case MVT::v4i8:
9340 return MVT::v4i16;
9341 }
9342}
9343
9344/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9345/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9346/// We insert the required extension here to get the vector to fill a D register.
9347static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
9348 const EVT &OrigTy,
9349 const EVT &ExtTy,
9350 unsigned ExtOpcode) {
9351 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9352 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9353 // 64-bits we need to insert a new extension so that it will be 64-bits.
9354 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9354, __extension__
__PRETTY_FUNCTION__))
;
9355 if (OrigTy.getSizeInBits() >= 64)
9356 return N;
9357
9358 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9359 EVT NewVT = getExtensionTo64Bits(OrigTy);
9360
9361 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9362}
9363
9364/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9365/// does not do any sign/zero extension. If the original vector is less
9366/// than 64 bits, an appropriate extension will be added after the load to
9367/// reach a total size of 64 bits. We have to add the extension separately
9368/// because ARM does not have a sign/zero extending load for vectors.
9369static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
9370 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9371
9372 // The load already has the right type.
9373 if (ExtendedTy == LD->getMemoryVT())
9374 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9375 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9376 LD->getMemOperand()->getFlags());
9377
9378 // We need to create a zextload/sextload. We cannot just create a load
9379 // followed by a zext/zext node because LowerMUL is also run during normal
9380 // operation legalization where we can't create illegal types.
9381 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9382 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9383 LD->getMemoryVT(), LD->getAlign(),
9384 LD->getMemOperand()->getFlags());
9385}
9386
9387/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9388/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9389/// the unextended value. The unextended vector should be 64 bits so that it can
9390/// be used as an operand to a VMULL instruction. If the original vector size
9391/// before extension is less than 64 bits we add a an extension to resize
9392/// the vector to 64 bits.
9393static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
9394 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9395 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9396 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9397 N->getOperand(0)->getValueType(0),
9398 N->getValueType(0),
9399 N->getOpcode());
9400
9401 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9402 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9403, __extension__
__PRETTY_FUNCTION__))
9403 "Expected extending load")(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9403, __extension__
__PRETTY_FUNCTION__))
;
9404
9405 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9406 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9407 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9408 SDValue extLoad =
9409 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9410 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9411
9412 return newLoad;
9413 }
9414
9415 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9416 // have been legalized as a BITCAST from v4i32.
9417 if (N->getOpcode() == ISD::BITCAST) {
9418 SDNode *BVN = N->getOperand(0).getNode();
9419 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9420, __extension__
__PRETTY_FUNCTION__))
9420 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9420, __extension__
__PRETTY_FUNCTION__))
;
9421 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9422 return DAG.getBuildVector(
9423 MVT::v2i32, SDLoc(N),
9424 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9425 }
9426 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9427 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9427, __extension__
__PRETTY_FUNCTION__))
;
9428 EVT VT = N->getValueType(0);
9429 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9430 unsigned NumElts = VT.getVectorNumElements();
9431 MVT TruncVT = MVT::getIntegerVT(EltSize);
9432 SmallVector<SDValue, 8> Ops;
9433 SDLoc dl(N);
9434 for (unsigned i = 0; i != NumElts; ++i) {
9435 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
9436 const APInt &CInt = C->getAPIntValue();
9437 // Element types smaller than 32 bits are not legal, so use i32 elements.
9438 // The values are implicitly truncated so sext vs. zext doesn't matter.
9439 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9440 }
9441 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9442}
9443
9444static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9445 unsigned Opcode = N->getOpcode();
9446 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9447 SDNode *N0 = N->getOperand(0).getNode();
9448 SDNode *N1 = N->getOperand(1).getNode();
9449 return N0->hasOneUse() && N1->hasOneUse() &&
9450 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9451 }
9452 return false;
9453}
9454
9455static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9456 unsigned Opcode = N->getOpcode();
9457 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9458 SDNode *N0 = N->getOperand(0).getNode();
9459 SDNode *N1 = N->getOperand(1).getNode();
9460 return N0->hasOneUse() && N1->hasOneUse() &&
9461 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9462 }
9463 return false;
9464}
9465
9466static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
9467 // Multiplications are only custom-lowered for 128-bit vectors so that
9468 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9469 EVT VT = Op.getValueType();
9470 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9471, __extension__
__PRETTY_FUNCTION__))
9471 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9471, __extension__
__PRETTY_FUNCTION__))
;
9472 SDNode *N0 = Op.getOperand(0).getNode();
9473 SDNode *N1 = Op.getOperand(1).getNode();
9474 unsigned NewOpc = 0;
9475 bool isMLA = false;
9476 bool isN0SExt = isSignExtended(N0, DAG);
9477 bool isN1SExt = isSignExtended(N1, DAG);
9478 if (isN0SExt && isN1SExt)
9479 NewOpc = ARMISD::VMULLs;
9480 else {
9481 bool isN0ZExt = isZeroExtended(N0, DAG);
9482 bool isN1ZExt = isZeroExtended(N1, DAG);
9483 if (isN0ZExt && isN1ZExt)
9484 NewOpc = ARMISD::VMULLu;
9485 else if (isN1SExt || isN1ZExt) {
9486 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9487 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9488 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9489 NewOpc = ARMISD::VMULLs;
9490 isMLA = true;
9491 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9492 NewOpc = ARMISD::VMULLu;
9493 isMLA = true;
9494 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9495 std::swap(N0, N1);
9496 NewOpc = ARMISD::VMULLu;
9497 isMLA = true;
9498 }
9499 }
9500
9501 if (!NewOpc) {
9502 if (VT == MVT::v2i64)
9503 // Fall through to expand this. It is not legal.
9504 return SDValue();
9505 else
9506 // Other vector multiplications are legal.
9507 return Op;
9508 }
9509 }
9510
9511 // Legalize to a VMULL instruction.
9512 SDLoc DL(Op);
9513 SDValue Op0;
9514 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9515 if (!isMLA) {
9516 Op0 = SkipExtensionForVMULL(N0, DAG);
9517 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9519, __extension__
__PRETTY_FUNCTION__))
9518 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9519, __extension__
__PRETTY_FUNCTION__))
9519 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9519, __extension__
__PRETTY_FUNCTION__))
;
9520 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9521 }
9522
9523 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9524 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9525 // vmull q0, d4, d6
9526 // vmlal q0, d5, d6
9527 // is faster than
9528 // vaddl q0, d4, d5
9529 // vmovl q1, d6
9530 // vmul q0, q0, q1
9531 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9532 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9533 EVT Op1VT = Op1.getValueType();
9534 return DAG.getNode(N0->getOpcode(), DL, VT,
9535 DAG.getNode(NewOpc, DL, VT,
9536 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9537 DAG.getNode(NewOpc, DL, VT,
9538 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9539}
9540
9541static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
9542 SelectionDAG &DAG) {
9543 // TODO: Should this propagate fast-math-flags?
9544
9545 // Convert to float
9546 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9547 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9548 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9549 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9550 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9551 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9552 // Get reciprocal estimate.
9553 // float4 recip = vrecpeq_f32(yf);
9554 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9555 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9556 Y);
9557 // Because char has a smaller range than uchar, we can actually get away
9558 // without any newton steps. This requires that we use a weird bias
9559 // of 0xb000, however (again, this has been exhaustively tested).
9560 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9561 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9562 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9563 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9564 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9565 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9566 // Convert back to short.
9567 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9568 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9569 return X;
9570}
9571
9572static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
9573 SelectionDAG &DAG) {
9574 // TODO: Should this propagate fast-math-flags?
9575
9576 SDValue N2;
9577 // Convert to float.
9578 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9579 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9580 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9581 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9582 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9583 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9584
9585 // Use reciprocal estimate and one refinement step.
9586 // float4 recip = vrecpeq_f32(yf);
9587 // recip *= vrecpsq_f32(yf, recip);
9588 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9589 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9590 N1);
9591 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9592 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9593 N1, N2);
9594 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9595 // Because short has a smaller range than ushort, we can actually get away
9596 // with only a single newton step. This requires that we use a weird bias
9597 // of 89, however (again, this has been exhaustively tested).
9598 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9599 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9600 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9601 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9602 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9603 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9604 // Convert back to integer and return.
9605 // return vmovn_s32(vcvt_s32_f32(result));
9606 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9607 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9608 return N0;
9609}
9610
9611static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG,
9612 const ARMSubtarget *ST) {
9613 EVT VT = Op.getValueType();
9614 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9615, __extension__
__PRETTY_FUNCTION__))
9615 "unexpected type for custom-lowering ISD::SDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9615, __extension__
__PRETTY_FUNCTION__))
;
9616
9617 SDLoc dl(Op);
9618 SDValue N0 = Op.getOperand(0);
9619 SDValue N1 = Op.getOperand(1);
9620 SDValue N2, N3;
9621
9622 if (VT == MVT::v8i8) {
9623 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9624 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9625
9626 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9627 DAG.getIntPtrConstant(4, dl));
9628 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9629 DAG.getIntPtrConstant(4, dl));
9630 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9631 DAG.getIntPtrConstant(0, dl));
9632 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9633 DAG.getIntPtrConstant(0, dl));
9634
9635 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9636 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9637
9638 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9639 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9640
9641 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9642 return N0;
9643 }
9644 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9645}
9646
9647static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,
9648 const ARMSubtarget *ST) {
9649 // TODO: Should this propagate fast-math-flags?
9650 EVT VT = Op.getValueType();
9651 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9652, __extension__
__PRETTY_FUNCTION__))
9652 "unexpected type for custom-lowering ISD::UDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9652, __extension__
__PRETTY_FUNCTION__))
;
9653
9654 SDLoc dl(Op);
9655 SDValue N0 = Op.getOperand(0);
9656 SDValue N1 = Op.getOperand(1);
9657 SDValue N2, N3;
9658
9659 if (VT == MVT::v8i8) {
9660 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9661 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9662
9663 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9664 DAG.getIntPtrConstant(4, dl));
9665 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9666 DAG.getIntPtrConstant(4, dl));
9667 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9668 DAG.getIntPtrConstant(0, dl));
9669 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9670 DAG.getIntPtrConstant(0, dl));
9671
9672 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9673 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9674
9675 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9676 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9677
9678 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9679 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9680 MVT::i32),
9681 N0);
9682 return N0;
9683 }
9684
9685 // v4i16 sdiv ... Convert to float.
9686 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9687 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9688 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9689 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9690 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9691 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9692
9693 // Use reciprocal estimate and two refinement steps.
9694 // float4 recip = vrecpeq_f32(yf);
9695 // recip *= vrecpsq_f32(yf, recip);
9696 // recip *= vrecpsq_f32(yf, recip);
9697 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9698 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9699 BN1);
9700 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9701 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9702 BN1, N2);
9703 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9704 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9705 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9706 BN1, N2);
9707 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9708 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9709 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9710 // and that it will never cause us to return an answer too large).
9711 // float4 result = as_float4(as_int4(xf*recip) + 2);
9712 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9713 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9714 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9715 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9716 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9717 // Convert back to integer and return.
9718 // return vmovn_u32(vcvt_s32_f32(result));
9719 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9720 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9721 return N0;
9722}
9723
9724static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
9725 SDNode *N = Op.getNode();
9726 EVT VT = N->getValueType(0);
9727 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9728
9729 SDValue Carry = Op.getOperand(2);
9730
9731 SDLoc DL(Op);
9732
9733 SDValue Result;
9734 if (Op.getOpcode() == ISD::ADDCARRY) {
9735 // This converts the boolean value carry into the carry flag.
9736 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9737
9738 // Do the addition proper using the carry flag we wanted.
9739 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9740 Op.getOperand(1), Carry);
9741
9742 // Now convert the carry flag into a boolean value.
9743 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9744 } else {
9745 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
9746 // have to invert the carry first.
9747 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9748 DAG.getConstant(1, DL, MVT::i32), Carry);
9749 // This converts the boolean value carry into the carry flag.
9750 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9751
9752 // Do the subtraction proper using the carry flag we wanted.
9753 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9754 Op.getOperand(1), Carry);
9755
9756 // Now convert the carry flag into a boolean value.
9757 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9758 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9759 // by ISD::SUBCARRY, so compute 1 - C.
9760 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9761 DAG.getConstant(1, DL, MVT::i32), Carry);
9762 }
9763
9764 // Return both values.
9765 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9766}
9767
9768SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9769 assert(Subtarget->isTargetDarwin())(static_cast <bool> (Subtarget->isTargetDarwin()) ? void
(0) : __assert_fail ("Subtarget->isTargetDarwin()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9769, __extension__ __PRETTY_FUNCTION__))
;
9770
9771 // For iOS, we want to call an alternative entry point: __sincos_stret,
9772 // return values are passed via sret.
9773 SDLoc dl(Op);
9774 SDValue Arg = Op.getOperand(0);
9775 EVT ArgVT = Arg.getValueType();
9776 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9777 auto PtrVT = getPointerTy(DAG.getDataLayout());
9778
9779 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9780 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9781
9782 // Pair of floats / doubles used to pass the result.
9783 Type *RetTy = StructType::get(ArgTy, ArgTy);
9784 auto &DL = DAG.getDataLayout();
9785
9786 ArgListTy Args;
9787 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9788 SDValue SRet;
9789 if (ShouldUseSRet) {
9790 // Create stack object for sret.
9791 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9792 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9793 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9794 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9795
9796 ArgListEntry Entry;
9797 Entry.Node = SRet;
9798 Entry.Ty = RetTy->getPointerTo();
9799 Entry.IsSExt = false;
9800 Entry.IsZExt = false;
9801 Entry.IsSRet = true;
9802 Args.push_back(Entry);
9803 RetTy = Type::getVoidTy(*DAG.getContext());
9804 }
9805
9806 ArgListEntry Entry;
9807 Entry.Node = Arg;
9808 Entry.Ty = ArgTy;
9809 Entry.IsSExt = false;
9810 Entry.IsZExt = false;
9811 Args.push_back(Entry);
9812
9813 RTLIB::Libcall LC =
9814 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9815 const char *LibcallName = getLibcallName(LC);
9816 CallingConv::ID CC = getLibcallCallingConv(LC);
9817 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9818
9819 TargetLowering::CallLoweringInfo CLI(DAG);
9820 CLI.setDebugLoc(dl)
9821 .setChain(DAG.getEntryNode())
9822 .setCallee(CC, RetTy, Callee, std::move(Args))
9823 .setDiscardResult(ShouldUseSRet);
9824 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9825
9826 if (!ShouldUseSRet)
9827 return CallResult.first;
9828
9829 SDValue LoadSin =
9830 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9831
9832 // Address of cos field.
9833 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9834 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9835 SDValue LoadCos =
9836 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9837
9838 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9839 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9840 LoadSin.getValue(0), LoadCos.getValue(0));
9841}
9842
9843SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9844 bool Signed,
9845 SDValue &Chain) const {
9846 EVT VT = Op.getValueType();
9847 assert((VT == MVT::i32 || VT == MVT::i64) &&(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9848, __extension__
__PRETTY_FUNCTION__))
9848 "unexpected type for custom lowering DIV")(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9848, __extension__
__PRETTY_FUNCTION__))
;
9849 SDLoc dl(Op);
9850
9851 const auto &DL = DAG.getDataLayout();
9852 const auto &TLI = DAG.getTargetLoweringInfo();
9853
9854 const char *Name = nullptr;
9855 if (Signed)
9856 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9857 else
9858 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9859
9860 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
9861
9862 ARMTargetLowering::ArgListTy Args;
9863
9864 for (auto AI : {1, 0}) {
9865 ArgListEntry Arg;
9866 Arg.Node = Op.getOperand(AI);
9867 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
9868 Args.push_back(Arg);
9869 }
9870
9871 CallLoweringInfo CLI(DAG);
9872 CLI.setDebugLoc(dl)
9873 .setChain(Chain)
9874 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
9875 ES, std::move(Args));
9876
9877 return LowerCallTo(CLI).first;
9878}
9879
9880// This is a code size optimisation: return the original SDIV node to
9881// DAGCombiner when we don't want to expand SDIV into a sequence of
9882// instructions, and an empty node otherwise which will cause the
9883// SDIV to be expanded in DAGCombine.
9884SDValue
9885ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
9886 SelectionDAG &DAG,
9887 SmallVectorImpl<SDNode *> &Created) const {
9888 // TODO: Support SREM
9889 if (N->getOpcode() != ISD::SDIV)
9890 return SDValue();
9891
9892 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
9893 const bool MinSize = ST.hasMinSize();
9894 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
9895 : ST.hasDivideInARMMode();
9896
9897 // Don't touch vector types; rewriting this may lead to scalarizing
9898 // the int divs.
9899 if (N->getOperand(0).getValueType().isVector())
9900 return SDValue();
9901
9902 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
9903 // hwdiv support for this to be really profitable.
9904 if (!(MinSize && HasDivide))
9905 return SDValue();
9906
9907 // ARM mode is a bit simpler than Thumb: we can handle large power
9908 // of 2 immediates with 1 mov instruction; no further checks required,
9909 // just return the sdiv node.
9910 if (!ST.isThumb())
9911 return SDValue(N, 0);
9912
9913 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
9914 // and thus lose the code size benefits of a MOVS that requires only 2.
9915 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
9916 // but as it's doing exactly this, it's not worth the trouble to get TTI.
9917 if (Divisor.sgt(128))
9918 return SDValue();
9919
9920 return SDValue(N, 0);
9921}
9922
9923SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
9924 bool Signed) const {
9925 assert(Op.getValueType() == MVT::i32 &&(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9926, __extension__
__PRETTY_FUNCTION__))
9926 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9926, __extension__
__PRETTY_FUNCTION__))
;
9927 SDLoc dl(Op);
9928
9929 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
9930 DAG.getEntryNode(), Op.getOperand(1));
9931
9932 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9933}
9934
9935static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
9936 SDLoc DL(N);
9937 SDValue Op = N->getOperand(1);
9938 if (N->getValueType(0) == MVT::i32)
9939 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
9940 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
9941 DAG.getConstant(0, DL, MVT::i32));
9942 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
9943 DAG.getConstant(1, DL, MVT::i32));
9944 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
9945 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
9946}
9947
9948void ARMTargetLowering::ExpandDIV_Windows(
9949 SDValue Op, SelectionDAG &DAG, bool Signed,
9950 SmallVectorImpl<SDValue> &Results) const {
9951 const auto &DL = DAG.getDataLayout();
9952 const auto &TLI = DAG.getTargetLoweringInfo();
9953
9954 assert(Op.getValueType() == MVT::i64 &&(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9955, __extension__
__PRETTY_FUNCTION__))
9955 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9955, __extension__
__PRETTY_FUNCTION__))
;
9956 SDLoc dl(Op);
9957
9958 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
9959
9960 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
9961
9962 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
9963 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
9964 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
9965 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
9966
9967 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
9968}
9969
9970static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {
9971 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
9972 EVT MemVT = LD->getMemoryVT();
9973 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9975, __extension__
__PRETTY_FUNCTION__))
9974 MemVT == MVT::v16i1) &&(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9975, __extension__
__PRETTY_FUNCTION__))
9975 "Expected a predicate type!")(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9975, __extension__
__PRETTY_FUNCTION__))
;
9976 assert(MemVT == Op.getValueType())(static_cast <bool> (MemVT == Op.getValueType()) ? void
(0) : __assert_fail ("MemVT == Op.getValueType()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 9976, __extension__ __PRETTY_FUNCTION__))
;
9977 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&(static_cast <bool> (LD->getExtensionType() == ISD::
NON_EXTLOAD && "Expected a non-extending load") ? void
(0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9978, __extension__
__PRETTY_FUNCTION__))
9978 "Expected a non-extending load")(static_cast <bool> (LD->getExtensionType() == ISD::
NON_EXTLOAD && "Expected a non-extending load") ? void
(0) : __assert_fail ("LD->getExtensionType() == ISD::NON_EXTLOAD && \"Expected a non-extending load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9978, __extension__
__PRETTY_FUNCTION__))
;
9979 assert(LD->isUnindexed() && "Expected a unindexed load")(static_cast <bool> (LD->isUnindexed() && "Expected a unindexed load"
) ? void (0) : __assert_fail ("LD->isUnindexed() && \"Expected a unindexed load\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 9979, __extension__
__PRETTY_FUNCTION__))
;
9980
9981 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
9982 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
9983 // need to make sure that 8/4/2 bits are actually loaded into the correct
9984 // place, which means loading the value and then shuffling the values into
9985 // the bottom bits of the predicate.
9986 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
9987 // for BE).
9988 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
9989 // a natural VMSR(load), so needs to be reversed.
9990
9991 SDLoc dl(Op);
9992 SDValue Load = DAG.getExtLoad(
9993 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
9994 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
9995 LD->getMemOperand());
9996 SDValue Val = Load;
9997 if (DAG.getDataLayout().isBigEndian())
9998 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
9999 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10000 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10001 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10002 if (MemVT != MVT::v16i1)
10003 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10004 DAG.getConstant(0, dl, MVT::i32));
10005 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10006}
10007
10008void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10009 SelectionDAG &DAG) const {
10010 LoadSDNode *LD = cast<LoadSDNode>(N);
10011 EVT MemVT = LD->getMemoryVT();
10012 assert(LD->isUnindexed() && "Loads should be unindexed at this point.")(static_cast <bool> (LD->isUnindexed() && "Loads should be unindexed at this point."
) ? void (0) : __assert_fail ("LD->isUnindexed() && \"Loads should be unindexed at this point.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10012, __extension__
__PRETTY_FUNCTION__))
;
10013
10014 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10015 !Subtarget->isThumb1Only() && LD->isVolatile()) {
10016 SDLoc dl(N);
10017 SDValue Result = DAG.getMemIntrinsicNode(
10018 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10019 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10020 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10021 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10022 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10023 Results.append({Pair, Result.getValue(2)});
10024 }
10025}
10026
10027static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {
10028 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10029 EVT MemVT = ST->getMemoryVT();
10030 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10032, __extension__
__PRETTY_FUNCTION__))
10031 MemVT == MVT::v16i1) &&(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10032, __extension__
__PRETTY_FUNCTION__))
10032 "Expected a predicate type!")(static_cast <bool> ((MemVT == MVT::v2i1 || MemVT == MVT
::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) &&
"Expected a predicate type!") ? void (0) : __assert_fail ("(MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || MemVT == MVT::v16i1) && \"Expected a predicate type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10032, __extension__
__PRETTY_FUNCTION__))
;
10033 assert(MemVT == ST->getValue().getValueType())(static_cast <bool> (MemVT == ST->getValue().getValueType
()) ? void (0) : __assert_fail ("MemVT == ST->getValue().getValueType()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10033, __extension__
__PRETTY_FUNCTION__))
;
10034 assert(!ST->isTruncatingStore() && "Expected a non-extending store")(static_cast <bool> (!ST->isTruncatingStore() &&
"Expected a non-extending store") ? void (0) : __assert_fail
("!ST->isTruncatingStore() && \"Expected a non-extending store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10034, __extension__
__PRETTY_FUNCTION__))
;
10035 assert(ST->isUnindexed() && "Expected a unindexed store")(static_cast <bool> (ST->isUnindexed() && "Expected a unindexed store"
) ? void (0) : __assert_fail ("ST->isUnindexed() && \"Expected a unindexed store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10035, __extension__
__PRETTY_FUNCTION__))
;
10036
10037 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10038 // top bits unset and a scalar store.
10039 SDLoc dl(Op);
10040 SDValue Build = ST->getValue();
10041 if (MemVT != MVT::v16i1) {
10042 SmallVector<SDValue, 16> Ops;
10043 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10044 unsigned Elt = DAG.getDataLayout().isBigEndian()
10045 ? MemVT.getVectorNumElements() - I - 1
10046 : I;
10047 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10048 DAG.getConstant(Elt, dl, MVT::i32)));
10049 }
10050 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10051 Ops.push_back(DAG.getUNDEF(MVT::i32));
10052 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10053 }
10054 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10055 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10056 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10057 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10058 DAG.getConstant(16, dl, MVT::i32));
10059 return DAG.getTruncStore(
10060 ST->getChain(), dl, GRP, ST->getBasePtr(),
10061 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
10062 ST->getMemOperand());
10063}
10064
10065static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
10066 const ARMSubtarget *Subtarget) {
10067 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10068 EVT MemVT = ST->getMemoryVT();
10069 assert(ST->isUnindexed() && "Stores should be unindexed at this point.")(static_cast <bool> (ST->isUnindexed() && "Stores should be unindexed at this point."
) ? void (0) : __assert_fail ("ST->isUnindexed() && \"Stores should be unindexed at this point.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10069, __extension__
__PRETTY_FUNCTION__))
;
10070
10071 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10072 !Subtarget->isThumb1Only() && ST->isVolatile()) {
10073 SDNode *N = Op.getNode();
10074 SDLoc dl(N);
10075
10076 SDValue Lo = DAG.getNode(
10077 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10078 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10079 MVT::i32));
10080 SDValue Hi = DAG.getNode(
10081 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10082 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10083 MVT::i32));
10084
10085 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10086 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10087 MemVT, ST->getMemOperand());
10088 } else if (Subtarget->hasMVEIntegerOps() &&
10089 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10090 MemVT == MVT::v16i1))) {
10091 return LowerPredicateStore(Op, DAG);
10092 }
10093
10094 return SDValue();
10095}
10096
10097static bool isZeroVector(SDValue N) {
10098 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10099 (N->getOpcode() == ARMISD::VMOVIMM &&
10100 isNullConstant(N->getOperand(0))));
10101}
10102
10103static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {
10104 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10105 MVT VT = Op.getSimpleValueType();
10106 SDValue Mask = N->getMask();
10107 SDValue PassThru = N->getPassThru();
10108 SDLoc dl(Op);
10109
10110 if (isZeroVector(PassThru))
10111 return Op;
10112
10113 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10114 // zero too, and other values are lowered to a select.
10115 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10116 DAG.getTargetConstant(0, dl, MVT::i32));
10117 SDValue NewLoad = DAG.getMaskedLoad(
10118 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10119 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10120 N->getExtensionType(), N->isExpandingLoad());
10121 SDValue Combo = NewLoad;
10122 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10123 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10124 isZeroVector(PassThru->getOperand(0));
10125 if (!PassThru.isUndef() && !PassThruIsCastZero)
10126 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10127 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10128}
10129
10130static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG,
10131 const ARMSubtarget *ST) {
10132 if (!ST->hasMVEIntegerOps())
10133 return SDValue();
10134
10135 SDLoc dl(Op);
10136 unsigned BaseOpcode = 0;
10137 switch (Op->getOpcode()) {
10138 default: llvm_unreachable("Expected VECREDUCE opcode")::llvm::llvm_unreachable_internal("Expected VECREDUCE opcode"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10138)
;
10139 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10140 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10141 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10142 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10143 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10144 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10145 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10146 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10147 }
10148
10149 SDValue Op0 = Op->getOperand(0);
10150 EVT VT = Op0.getValueType();
10151 EVT EltVT = VT.getVectorElementType();
10152 unsigned NumElts = VT.getVectorNumElements();
10153 unsigned NumActiveLanes = NumElts;
10154
10155 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10157, __extension__
__PRETTY_FUNCTION__))
10156 NumActiveLanes == 2) &&(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10157, __extension__
__PRETTY_FUNCTION__))
10157 "Only expected a power 2 vector size")(static_cast <bool> ((NumActiveLanes == 16 || NumActiveLanes
== 8 || NumActiveLanes == 4 || NumActiveLanes == 2) &&
"Only expected a power 2 vector size") ? void (0) : __assert_fail
("(NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || NumActiveLanes == 2) && \"Only expected a power 2 vector size\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10157, __extension__
__PRETTY_FUNCTION__))
;
10158
10159 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10160 // allows us to easily extract vector elements from the lanes.
10161 while (NumActiveLanes > 4) {
10162 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10163 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10164 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10165 NumActiveLanes /= 2;
10166 }
10167
10168 SDValue Res;
10169 if (NumActiveLanes == 4) {
10170 // The remaining 4 elements are summed sequentially
10171 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10172 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10173 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10174 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10175 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10176 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10177 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10178 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10179 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10180 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10181 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10182 } else {
10183 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10184 DAG.getConstant(0, dl, MVT::i32));
10185 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10186 DAG.getConstant(1, dl, MVT::i32));
10187 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10188 }
10189
10190 // Result type may be wider than element type.
10191 if (EltVT != Op->getValueType(0))
10192 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10193 return Res;
10194}
10195
10196static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,
10197 const ARMSubtarget *ST) {
10198 if (!ST->hasMVEFloatOps())
10199 return SDValue();
10200 return LowerVecReduce(Op, DAG, ST);
10201}
10202
10203static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
10204 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10205 // Acquire/Release load/store is not legal for targets without a dmb or
10206 // equivalent available.
10207 return SDValue();
10208
10209 // Monotonic load/store is legal for all targets.
10210 return Op;
10211}
10212
10213static void ReplaceREADCYCLECOUNTER(SDNode *N,
10214 SmallVectorImpl<SDValue> &Results,
10215 SelectionDAG &DAG,
10216 const ARMSubtarget *Subtarget) {
10217 SDLoc DL(N);
10218 // Under Power Management extensions, the cycle-count is:
10219 // mrc p15, #0, <Rt>, c9, c13, #0
10220 SDValue Ops[] = { N->getOperand(0), // Chain
10221 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10222 DAG.getTargetConstant(15, DL, MVT::i32),
10223 DAG.getTargetConstant(0, DL, MVT::i32),
10224 DAG.getTargetConstant(9, DL, MVT::i32),
10225 DAG.getTargetConstant(13, DL, MVT::i32),
10226 DAG.getTargetConstant(0, DL, MVT::i32)
10227 };
10228
10229 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10230 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10231 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10232 DAG.getConstant(0, DL, MVT::i32)));
10233 Results.push_back(Cycles32.getValue(1));
10234}
10235
10236static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
10237 SDLoc dl(V.getNode());
10238 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
10239 SDValue VHi = DAG.getAnyExtOrTrunc(
10240 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
10241 dl, MVT::i32);
10242 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10243 if (isBigEndian)
10244 std::swap (VLo, VHi);
10245 SDValue RegClass =
10246 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10247 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10248 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10249 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10250 return SDValue(
10251 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10252}
10253
10254static void ReplaceCMP_SWAP_64Results(SDNode *N,
10255 SmallVectorImpl<SDValue> & Results,
10256 SelectionDAG &DAG) {
10257 assert(N->getValueType(0) == MVT::i64 &&(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10258, __extension__
__PRETTY_FUNCTION__))
10258 "AtomicCmpSwap on types less than 64 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10258, __extension__
__PRETTY_FUNCTION__))
;
10259 SDValue Ops[] = {N->getOperand(1),
10260 createGPRPairNode(DAG, N->getOperand(2)),
10261 createGPRPairNode(DAG, N->getOperand(3)),
10262 N->getOperand(0)};
10263 SDNode *CmpSwap = DAG.getMachineNode(
10264 ARM::CMP_SWAP_64, SDLoc(N),
10265 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10266
10267 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10268 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10269
10270 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10271
10272 SDValue Lo =
10273 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10274 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10275 SDValue Hi =
10276 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10277 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10278 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10279 Results.push_back(SDValue(CmpSwap, 2));
10280}
10281
10282SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10283 SDLoc dl(Op);
10284 EVT VT = Op.getValueType();
10285 SDValue Chain = Op.getOperand(0);
10286 SDValue LHS = Op.getOperand(1);
10287 SDValue RHS = Op.getOperand(2);
10288 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10289 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10290
10291 // If we don't have instructions of this float type then soften to a libcall
10292 // and use SETCC instead.
10293 if (isUnsupportedFloatingType(LHS.getValueType())) {
5
Taking false branch
10294 DAG.getTargetLoweringInfo().softenSetCCOperands(
10295 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10296 if (!RHS.getNode()) {
10297 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10298 CC = ISD::SETNE;
10299 }
10300 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10301 DAG.getCondCode(CC));
10302 return DAG.getMergeValues({Result, Chain}, dl);
10303 }
10304
10305 ARMCC::CondCodes CondCode, CondCode2;
10306 FPCCToARMCC(CC, CondCode, CondCode2);
10307
10308 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10309 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10310 // instructions using a chain instead of glue. This would also fix the problem
10311 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10312 // CondCode2 != AL.
10313 SDValue True = DAG.getConstant(1, dl, VT);
10314 SDValue False = DAG.getConstant(0, dl, VT);
10315 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10316 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10317 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
6
Calling 'ARMTargetLowering::getVFPCmp'
10318 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10319 if (CondCode2 != ARMCC::AL) {
10320 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10321 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10322 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10323 }
10324 return DAG.getMergeValues({Result, Chain}, dl);
10325}
10326
10327SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10328 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10329
10330 EVT VT = getPointerTy(DAG.getDataLayout());
10331 SDLoc DL(Op);
10332 int FI = MFI.CreateFixedObject(4, 0, false);
10333 return DAG.getFrameIndex(FI, VT);
10334}
10335
10336SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10337 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Lowering node: "; Op.dump();
} } while (false)
;
1
Assuming 'DebugFlag' is false
2
Loop condition is false. Exiting loop
10338 switch (Op.getOpcode()) {
3
Control jumps to 'case STRICT_FSETCC:' at line 10447
10339 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10339)
;
10340 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10341 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10342 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10343 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10344 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10345 case ISD::SELECT: return LowerSELECT(Op, DAG);
10346 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10347 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10348 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10349 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10350 case ISD::VASTART: return LowerVASTART(Op, DAG);
10351 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10352 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10353 case ISD::SINT_TO_FP:
10354 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10355 case ISD::STRICT_FP_TO_SINT:
10356 case ISD::STRICT_FP_TO_UINT:
10357 case ISD::FP_TO_SINT:
10358 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10359 case ISD::FP_TO_SINT_SAT:
10360 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10361 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10362 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10363 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10364 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10365 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10366 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10367 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10368 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10369 Subtarget);
10370 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10371 case ISD::SHL:
10372 case ISD::SRL:
10373 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10374 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10375 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10376 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10377 case ISD::SRL_PARTS:
10378 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10379 case ISD::CTTZ:
10380 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10381 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10382 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10383 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10384 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10385 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10386 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10387 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10388 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10389 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10390 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10391 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10392 case ISD::SIGN_EXTEND:
10393 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10394 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
10395 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10396 case ISD::MUL: return LowerMUL(Op, DAG);
10397 case ISD::SDIV:
10398 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10399 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10400 return LowerSDIV(Op, DAG, Subtarget);
10401 case ISD::UDIV:
10402 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10403 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10404 return LowerUDIV(Op, DAG, Subtarget);
10405 case ISD::ADDCARRY:
10406 case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
10407 case ISD::SADDO:
10408 case ISD::SSUBO:
10409 return LowerSignedALUO(Op, DAG);
10410 case ISD::UADDO:
10411 case ISD::USUBO:
10412 return LowerUnsignedALUO(Op, DAG);
10413 case ISD::SADDSAT:
10414 case ISD::SSUBSAT:
10415 case ISD::UADDSAT:
10416 case ISD::USUBSAT:
10417 return LowerADDSUBSAT(Op, DAG, Subtarget);
10418 case ISD::LOAD:
10419 return LowerPredicateLoad(Op, DAG);
10420 case ISD::STORE:
10421 return LowerSTORE(Op, DAG, Subtarget);
10422 case ISD::MLOAD:
10423 return LowerMLOAD(Op, DAG);
10424 case ISD::VECREDUCE_MUL:
10425 case ISD::VECREDUCE_AND:
10426 case ISD::VECREDUCE_OR:
10427 case ISD::VECREDUCE_XOR:
10428 return LowerVecReduce(Op, DAG, Subtarget);
10429 case ISD::VECREDUCE_FADD:
10430 case ISD::VECREDUCE_FMUL:
10431 case ISD::VECREDUCE_FMIN:
10432 case ISD::VECREDUCE_FMAX:
10433 return LowerVecReduceF(Op, DAG, Subtarget);
10434 case ISD::ATOMIC_LOAD:
10435 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10436 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10437 case ISD::SDIVREM:
10438 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10439 case ISD::DYNAMIC_STACKALLOC:
10440 if (Subtarget->isTargetWindows())
10441 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10442 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10442)
;
10443 case ISD::STRICT_FP_ROUND:
10444 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10445 case ISD::STRICT_FP_EXTEND:
10446 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10447 case ISD::STRICT_FSETCC:
10448 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
4
Calling 'ARMTargetLowering::LowerFSETCC'
10449 case ISD::SPONENTRY:
10450 return LowerSPONENTRY(Op, DAG);
10451 case ARMISD::WIN__DBZCHK: return SDValue();
10452 }
10453}
10454
10455static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
10456 SelectionDAG &DAG) {
10457 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10458 unsigned Opc = 0;
10459 if (IntNo == Intrinsic::arm_smlald)
10460 Opc = ARMISD::SMLALD;
10461 else if (IntNo == Intrinsic::arm_smlaldx)
10462 Opc = ARMISD::SMLALDX;
10463 else if (IntNo == Intrinsic::arm_smlsld)
10464 Opc = ARMISD::SMLSLD;
10465 else if (IntNo == Intrinsic::arm_smlsldx)
10466 Opc = ARMISD::SMLSLDX;
10467 else
10468 return;
10469
10470 SDLoc dl(N);
10471 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
10472 N->getOperand(3),
10473 DAG.getConstant(0, dl, MVT::i32));
10474 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
10475 N->getOperand(3),
10476 DAG.getConstant(1, dl, MVT::i32));
10477
10478 SDValue LongMul = DAG.getNode(Opc, dl,
10479 DAG.getVTList(MVT::i32, MVT::i32),
10480 N->getOperand(1), N->getOperand(2),
10481 Lo, Hi);
10482 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10483 LongMul.getValue(0), LongMul.getValue(1)));
10484}
10485
10486/// ReplaceNodeResults - Replace the results of node with an illegal result
10487/// type with new values built out of custom code.
10488void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
10489 SmallVectorImpl<SDValue> &Results,
10490 SelectionDAG &DAG) const {
10491 SDValue Res;
10492 switch (N->getOpcode()) {
10493 default:
10494 llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10494)
;
10495 case ISD::READ_REGISTER:
10496 ExpandREAD_REGISTER(N, Results, DAG);
10497 break;
10498 case ISD::BITCAST:
10499 Res = ExpandBITCAST(N, DAG, Subtarget);
10500 break;
10501 case ISD::SRL:
10502 case ISD::SRA:
10503 case ISD::SHL:
10504 Res = Expand64BitShift(N, DAG, Subtarget);
10505 break;
10506 case ISD::SREM:
10507 case ISD::UREM:
10508 Res = LowerREM(N, DAG);
10509 break;
10510 case ISD::SDIVREM:
10511 case ISD::UDIVREM:
10512 Res = LowerDivRem(SDValue(N, 0), DAG);
10513 assert(Res.getNumOperands() == 2 && "DivRem needs two values")(static_cast <bool> (Res.getNumOperands() == 2 &&
"DivRem needs two values") ? void (0) : __assert_fail ("Res.getNumOperands() == 2 && \"DivRem needs two values\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10513, __extension__
__PRETTY_FUNCTION__))
;
10514 Results.push_back(Res.getValue(0));
10515 Results.push_back(Res.getValue(1));
10516 return;
10517 case ISD::SADDSAT:
10518 case ISD::SSUBSAT:
10519 case ISD::UADDSAT:
10520 case ISD::USUBSAT:
10521 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10522 break;
10523 case ISD::READCYCLECOUNTER:
10524 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10525 return;
10526 case ISD::UDIV:
10527 case ISD::SDIV:
10528 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows")(static_cast <bool> (Subtarget->isTargetWindows() &&
"can only expand DIV on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"can only expand DIV on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10528, __extension__
__PRETTY_FUNCTION__))
;
10529 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10530 Results);
10531 case ISD::ATOMIC_CMP_SWAP:
10532 ReplaceCMP_SWAP_64Results(N, Results, DAG);
10533 return;
10534 case ISD::INTRINSIC_WO_CHAIN:
10535 return ReplaceLongIntrinsic(N, Results, DAG);
10536 case ISD::LOAD:
10537 LowerLOAD(N, Results, DAG);
10538 break;
10539 case ISD::TRUNCATE:
10540 Res = LowerTruncate(N, DAG, Subtarget);
10541 break;
10542 case ISD::SIGN_EXTEND:
10543 case ISD::ZERO_EXTEND:
10544 Res = LowerVectorExtend(N, DAG, Subtarget);
10545 break;
10546 case ISD::FP_TO_SINT_SAT:
10547 case ISD::FP_TO_UINT_SAT:
10548 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10549 break;
10550 }
10551 if (Res.getNode())
10552 Results.push_back(Res);
10553}
10554
10555//===----------------------------------------------------------------------===//
10556// ARM Scheduler Hooks
10557//===----------------------------------------------------------------------===//
10558
10559/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10560/// registers the function context.
10561void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10562 MachineBasicBlock *MBB,
10563 MachineBasicBlock *DispatchBB,
10564 int FI) const {
10565 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10566, __extension__
__PRETTY_FUNCTION__))
10566 "ROPI/RWPI not currently supported with SjLj")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported with SjLj\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10566, __extension__
__PRETTY_FUNCTION__))
;
10567 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10568 DebugLoc dl = MI.getDebugLoc();
10569 MachineFunction *MF = MBB->getParent();
10570 MachineRegisterInfo *MRI = &MF->getRegInfo();
10571 MachineConstantPool *MCP = MF->getConstantPool();
10572 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
10573 const Function &F = MF->getFunction();
10574
10575 bool isThumb = Subtarget->isThumb();
10576 bool isThumb2 = Subtarget->isThumb2();
10577
10578 unsigned PCLabelId = AFI->createPICLabelUId();
10579 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10580 ARMConstantPoolValue *CPV =
10581 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10582 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10583
10584 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10585 : &ARM::GPRRegClass;
10586
10587 // Grab constant pool and fixed stack memory operands.
10588 MachineMemOperand *CPMMO =
10589 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
10590 MachineMemOperand::MOLoad, 4, Align(4));
10591
10592 MachineMemOperand *FIMMOSt =
10593 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
10594 MachineMemOperand::MOStore, 4, Align(4));
10595
10596 // Load the address of the dispatch MBB into the jump buffer.
10597 if (isThumb2) {
10598 // Incoming value: jbuf
10599 // ldr.n r5, LCPI1_1
10600 // orr r5, r5, #1
10601 // add r5, pc
10602 // str r5, [$jbuf, #+4] ; &jbuf[1]
10603 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10604 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10605 .addConstantPoolIndex(CPI)
10606 .addMemOperand(CPMMO)
10607 .add(predOps(ARMCC::AL));
10608 // Set the low bit because of thumb mode.
10609 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10610 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10611 .addReg(NewVReg1, RegState::Kill)
10612 .addImm(0x01)
10613 .add(predOps(ARMCC::AL))
10614 .add(condCodeOp());
10615 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10616 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10617 .addReg(NewVReg2, RegState::Kill)
10618 .addImm(PCLabelId);
10619 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10620 .addReg(NewVReg3, RegState::Kill)
10621 .addFrameIndex(FI)
10622 .addImm(36) // &jbuf[1] :: pc
10623 .addMemOperand(FIMMOSt)
10624 .add(predOps(ARMCC::AL));
10625 } else if (isThumb) {
10626 // Incoming value: jbuf
10627 // ldr.n r1, LCPI1_4
10628 // add r1, pc
10629 // mov r2, #1
10630 // orrs r1, r2
10631 // add r2, $jbuf, #+4 ; &jbuf[1]
10632 // str r1, [r2]
10633 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10634 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10635 .addConstantPoolIndex(CPI)
10636 .addMemOperand(CPMMO)
10637 .add(predOps(ARMCC::AL));
10638 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10639 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10640 .addReg(NewVReg1, RegState::Kill)
10641 .addImm(PCLabelId);
10642 // Set the low bit because of thumb mode.
10643 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10644 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10645 .addReg(ARM::CPSR, RegState::Define)
10646 .addImm(1)
10647 .add(predOps(ARMCC::AL));
10648 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10649 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10650 .addReg(ARM::CPSR, RegState::Define)
10651 .addReg(NewVReg2, RegState::Kill)
10652 .addReg(NewVReg3, RegState::Kill)
10653 .add(predOps(ARMCC::AL));
10654 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10655 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10656 .addFrameIndex(FI)
10657 .addImm(36); // &jbuf[1] :: pc
10658 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10659 .addReg(NewVReg4, RegState::Kill)
10660 .addReg(NewVReg5, RegState::Kill)
10661 .addImm(0)
10662 .addMemOperand(FIMMOSt)
10663 .add(predOps(ARMCC::AL));
10664 } else {
10665 // Incoming value: jbuf
10666 // ldr r1, LCPI1_1
10667 // add r1, pc, r1
10668 // str r1, [$jbuf, #+4] ; &jbuf[1]
10669 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10670 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10671 .addConstantPoolIndex(CPI)
10672 .addImm(0)
10673 .addMemOperand(CPMMO)
10674 .add(predOps(ARMCC::AL));
10675 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10676 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10677 .addReg(NewVReg1, RegState::Kill)
10678 .addImm(PCLabelId)
10679 .add(predOps(ARMCC::AL));
10680 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10681 .addReg(NewVReg2, RegState::Kill)
10682 .addFrameIndex(FI)
10683 .addImm(36) // &jbuf[1] :: pc
10684 .addMemOperand(FIMMOSt)
10685 .add(predOps(ARMCC::AL));
10686 }
10687}
10688
10689void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10690 MachineBasicBlock *MBB) const {
10691 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10692 DebugLoc dl = MI.getDebugLoc();
10693 MachineFunction *MF = MBB->getParent();
10694 MachineRegisterInfo *MRI = &MF->getRegInfo();
10695 MachineFrameInfo &MFI = MF->getFrameInfo();
10696 int FI = MFI.getFunctionContextIndex();
10697
10698 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10699 : &ARM::GPRnopcRegClass;
10700
10701 // Get a mapping of the call site numbers to all of the landing pads they're
10702 // associated with.
10703 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
10704 unsigned MaxCSNum = 0;
10705 for (MachineBasicBlock &BB : *MF) {
10706 if (!BB.isEHPad())
10707 continue;
10708
10709 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10710 // pad.
10711 for (MachineInstr &II : BB) {
10712 if (!II.isEHLabel())
10713 continue;
10714
10715 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10716 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10717
10718 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10719 for (unsigned Idx : CallSiteIdxs) {
10720 CallSiteNumToLPad[Idx].push_back(&BB);
10721 MaxCSNum = std::max(MaxCSNum, Idx);
10722 }
10723 break;
10724 }
10725 }
10726
10727 // Get an ordered list of the machine basic blocks for the jump table.
10728 std::vector<MachineBasicBlock*> LPadList;
10729 SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;
10730 LPadList.reserve(CallSiteNumToLPad.size());
10731 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10732 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10733 for (MachineBasicBlock *MBB : MBBList) {
10734 LPadList.push_back(MBB);
10735 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10736 }
10737 }
10738
10739 assert(!LPadList.empty() &&(static_cast <bool> (!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? void (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10740, __extension__
__PRETTY_FUNCTION__))
10740 "No landing pad destinations for the dispatch jump table!")(static_cast <bool> (!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"
) ? void (0) : __assert_fail ("!LPadList.empty() && \"No landing pad destinations for the dispatch jump table!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 10740, __extension__
__PRETTY_FUNCTION__))
;
10741
10742 // Create the jump table and associated information.
10743 MachineJumpTableInfo *JTI =
10744 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10745 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10746
10747 // Create the MBBs for the dispatch code.
10748
10749 // Shove the dispatch's address into the return slot in the function context.
10750 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10751 DispatchBB->setIsEHPad();
10752
10753 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10754 unsigned trap_opcode;
10755 if (Subtarget->isThumb())
10756 trap_opcode = ARM::tTRAP;
10757 else
10758 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10759
10760 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10761 DispatchBB->addSuccessor(TrapBB);
10762
10763 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10764 DispatchBB->addSuccessor(DispContBB);
10765
10766 // Insert and MBBs.
10767 MF->insert(MF->end(), DispatchBB);
10768 MF->insert(MF->end(), DispContBB);
10769 MF->insert(MF->end(), TrapBB);
10770
10771 // Insert code into the entry block that creates and registers the function
10772 // context.
10773 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10774
10775 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10776 MachinePointerInfo::getFixedStack(*MF, FI),
10777 MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, Align(4));
10778
10779 MachineInstrBuilder MIB;
10780 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10781
10782 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10783 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10784
10785 // Add a register mask with no preserved registers. This results in all
10786 // registers being marked as clobbered. This can't work if the dispatch block
10787 // is in a Thumb1 function and is linked with ARM code which uses the FP
10788 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
10789 MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
10790
10791 bool IsPositionIndependent = isPositionIndependent();
10792 unsigned NumLPads = LPadList.size();
10793 if (Subtarget->isThumb2()) {
10794 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10795 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
10796 .addFrameIndex(FI)
10797 .addImm(4)
10798 .addMemOperand(FIMMOLd)
10799 .add(predOps(ARMCC::AL));
10800
10801 if (NumLPads < 256) {
10802 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
10803 .addReg(NewVReg1)
10804 .addImm(LPadList.size())
10805 .add(predOps(ARMCC::AL));
10806 } else {
10807 Register VReg1 = MRI->createVirtualRegister(TRC);
10808 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
10809 .addImm(NumLPads & 0xFFFF)
10810 .add(predOps(ARMCC::AL));
10811
10812 unsigned VReg2 = VReg1;
10813 if ((NumLPads & 0xFFFF0000) != 0) {
10814 VReg2 = MRI->createVirtualRegister(TRC);
10815 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
10816 .addReg(VReg1)
10817 .addImm(NumLPads >> 16)
10818 .add(predOps(ARMCC::AL));
10819 }
10820
10821 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
10822 .addReg(NewVReg1)
10823 .addReg(VReg2)
10824 .add(predOps(ARMCC::AL));
10825 }
10826
10827 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
10828 .addMBB(TrapBB)
10829 .addImm(ARMCC::HI)
10830 .addReg(ARM::CPSR);
10831
10832 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10833 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
10834 .addJumpTableIndex(MJTI)
10835 .add(predOps(ARMCC::AL));
10836
10837 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10838 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
10839 .addReg(NewVReg3, RegState::Kill)
10840 .addReg(NewVReg1)
10841 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
10842 .add(predOps(ARMCC::AL))
10843 .add(condCodeOp());
10844
10845 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
10846 .addReg(NewVReg4, RegState::Kill)
10847 .addReg(NewVReg1)
10848 .addJumpTableIndex(MJTI);
10849 } else if (Subtarget->isThumb()) {
10850 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10851 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
10852 .addFrameIndex(FI)
10853 .addImm(1)
10854 .addMemOperand(FIMMOLd)
10855 .add(predOps(ARMCC::AL));
10856
10857 if (NumLPads < 256) {
10858 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
10859 .addReg(NewVReg1)
10860 .addImm(NumLPads)
10861 .add(predOps(ARMCC::AL));
10862 } else {
10863 MachineConstantPool *ConstantPool = MF->getConstantPool();
10864 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10865 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10866
10867 // MachineConstantPool wants an explicit alignment.
10868 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10869 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10870
10871 Register VReg1 = MRI->createVirtualRegister(TRC);
10872 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
10873 .addReg(VReg1, RegState::Define)
10874 .addConstantPoolIndex(Idx)
10875 .add(predOps(ARMCC::AL));
10876 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
10877 .addReg(NewVReg1)
10878 .addReg(VReg1)
10879 .add(predOps(ARMCC::AL));
10880 }
10881
10882 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
10883 .addMBB(TrapBB)
10884 .addImm(ARMCC::HI)
10885 .addReg(ARM::CPSR);
10886
10887 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10888 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
10889 .addReg(ARM::CPSR, RegState::Define)
10890 .addReg(NewVReg1)
10891 .addImm(2)
10892 .add(predOps(ARMCC::AL));
10893
10894 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10895 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
10896 .addJumpTableIndex(MJTI)
10897 .add(predOps(ARMCC::AL));
10898
10899 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10900 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
10901 .addReg(ARM::CPSR, RegState::Define)
10902 .addReg(NewVReg2, RegState::Kill)
10903 .addReg(NewVReg3)
10904 .add(predOps(ARMCC::AL));
10905
10906 MachineMemOperand *JTMMOLd =
10907 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
10908 MachineMemOperand::MOLoad, 4, Align(4));
10909
10910 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10911 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
10912 .addReg(NewVReg4, RegState::Kill)
10913 .addImm(0)
10914 .addMemOperand(JTMMOLd)
10915 .add(predOps(ARMCC::AL));
10916
10917 unsigned NewVReg6 = NewVReg5;
10918 if (IsPositionIndependent) {
10919 NewVReg6 = MRI->createVirtualRegister(TRC);
10920 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
10921 .addReg(ARM::CPSR, RegState::Define)
10922 .addReg(NewVReg5, RegState::Kill)
10923 .addReg(NewVReg3)
10924 .add(predOps(ARMCC::AL));
10925 }
10926
10927 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
10928 .addReg(NewVReg6, RegState::Kill)
10929 .addJumpTableIndex(MJTI);
10930 } else {
10931 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10932 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
10933 .addFrameIndex(FI)
10934 .addImm(4)
10935 .addMemOperand(FIMMOLd)
10936 .add(predOps(ARMCC::AL));
10937
10938 if (NumLPads < 256) {
10939 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
10940 .addReg(NewVReg1)
10941 .addImm(NumLPads)
10942 .add(predOps(ARMCC::AL));
10943 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
10944 Register VReg1 = MRI->createVirtualRegister(TRC);
10945 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
10946 .addImm(NumLPads & 0xFFFF)
10947 .add(predOps(ARMCC::AL));
10948
10949 unsigned VReg2 = VReg1;
10950 if ((NumLPads & 0xFFFF0000) != 0) {
10951 VReg2 = MRI->createVirtualRegister(TRC);
10952 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
10953 .addReg(VReg1)
10954 .addImm(NumLPads >> 16)
10955 .add(predOps(ARMCC::AL));
10956 }
10957
10958 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10959 .addReg(NewVReg1)
10960 .addReg(VReg2)
10961 .add(predOps(ARMCC::AL));
10962 } else {
10963 MachineConstantPool *ConstantPool = MF->getConstantPool();
10964 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
10965 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
10966
10967 // MachineConstantPool wants an explicit alignment.
10968 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
10969 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
10970
10971 Register VReg1 = MRI->createVirtualRegister(TRC);
10972 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
10973 .addReg(VReg1, RegState::Define)
10974 .addConstantPoolIndex(Idx)
10975 .addImm(0)
10976 .add(predOps(ARMCC::AL));
10977 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
10978 .addReg(NewVReg1)
10979 .addReg(VReg1, RegState::Kill)
10980 .add(predOps(ARMCC::AL));
10981 }
10982
10983 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
10984 .addMBB(TrapBB)
10985 .addImm(ARMCC::HI)
10986 .addReg(ARM::CPSR);
10987
10988 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10989 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
10990 .addReg(NewVReg1)
10991 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
10992 .add(predOps(ARMCC::AL))
10993 .add(condCodeOp());
10994 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10995 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
10996 .addJumpTableIndex(MJTI)
10997 .add(predOps(ARMCC::AL));
10998
10999 MachineMemOperand *JTMMOLd =
11000 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11001 MachineMemOperand::MOLoad, 4, Align(4));
11002 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11003 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11004 .addReg(NewVReg3, RegState::Kill)
11005 .addReg(NewVReg4)
11006 .addImm(0)
11007 .addMemOperand(JTMMOLd)
11008 .add(predOps(ARMCC::AL));
11009
11010 if (IsPositionIndependent) {
11011 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11012 .addReg(NewVReg5, RegState::Kill)
11013 .addReg(NewVReg4)
11014 .addJumpTableIndex(MJTI);
11015 } else {
11016 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11017 .addReg(NewVReg5, RegState::Kill)
11018 .addJumpTableIndex(MJTI);
11019 }
11020 }
11021
11022 // Add the jump table entries as successors to the MBB.
11023 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
11024 for (MachineBasicBlock *CurMBB : LPadList) {
11025 if (SeenMBBs.insert(CurMBB).second)
11026 DispContBB->addSuccessor(CurMBB);
11027 }
11028
11029 // N.B. the order the invoke BBs are processed in doesn't matter here.
11030 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11031 SmallVector<MachineBasicBlock*, 64> MBBLPads;
11032 for (MachineBasicBlock *BB : InvokeBBs) {
11033
11034 // Remove the landing pad successor from the invoke block and replace it
11035 // with the new dispatch block.
11036 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11037 while (!Successors.empty()) {
11038 MachineBasicBlock *SMBB = Successors.pop_back_val();
11039 if (SMBB->isEHPad()) {
11040 BB->removeSuccessor(SMBB);
11041 MBBLPads.push_back(SMBB);
11042 }
11043 }
11044
11045 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11046 BB->normalizeSuccProbs();
11047
11048 // Find the invoke call and mark all of the callee-saved registers as
11049 // 'implicit defined' so that they're spilled. This prevents code from
11050 // moving instructions to before the EH block, where they will never be
11051 // executed.
11052 for (MachineBasicBlock::reverse_iterator
11053 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11054 if (!II->isCall()) continue;
11055
11056 DenseMap<unsigned, bool> DefRegs;
11057 for (MachineInstr::mop_iterator
11058 OI = II->operands_begin(), OE = II->operands_end();
11059 OI != OE; ++OI) {
11060 if (!OI->isReg()) continue;
11061 DefRegs[OI->getReg()] = true;
11062 }
11063
11064 MachineInstrBuilder MIB(*MF, &*II);
11065
11066 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11067 unsigned Reg = SavedRegs[i];
11068 if (Subtarget->isThumb2() &&
11069 !ARM::tGPRRegClass.contains(Reg) &&
11070 !ARM::hGPRRegClass.contains(Reg))
11071 continue;
11072 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11073 continue;
11074 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11075 continue;
11076 if (!DefRegs[Reg])
11077 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
11078 }
11079
11080 break;
11081 }
11082 }
11083
11084 // Mark all former landing pads as non-landing pads. The dispatch is the only
11085 // landing pad now.
11086 for (MachineBasicBlock *MBBLPad : MBBLPads)
11087 MBBLPad->setIsEHPad(false);
11088
11089 // The instruction is gone now.
11090 MI.eraseFromParent();
11091}
11092
11093static
11094MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
11095 for (MachineBasicBlock *S : MBB->successors())
11096 if (S != Succ)
11097 return S;
11098 llvm_unreachable("Expecting a BB with two successors!")::llvm::llvm_unreachable_internal("Expecting a BB with two successors!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11098)
;
11099}
11100
11101/// Return the load opcode for a given load size. If load size >= 8,
11102/// neon opcode will be returned.
11103static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11104 if (LdSize >= 8)
11105 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11106 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11107 if (IsThumb1)
11108 return LdSize == 4 ? ARM::tLDRi
11109 : LdSize == 2 ? ARM::tLDRHi
11110 : LdSize == 1 ? ARM::tLDRBi : 0;
11111 if (IsThumb2)
11112 return LdSize == 4 ? ARM::t2LDR_POST
11113 : LdSize == 2 ? ARM::t2LDRH_POST
11114 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11115 return LdSize == 4 ? ARM::LDR_POST_IMM
11116 : LdSize == 2 ? ARM::LDRH_POST
11117 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11118}
11119
11120/// Return the store opcode for a given store size. If store size >= 8,
11121/// neon opcode will be returned.
11122static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11123 if (StSize >= 8)
11124 return StSize == 16 ? ARM::VST1q32wb_fixed
11125 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11126 if (IsThumb1)
11127 return StSize == 4 ? ARM::tSTRi
11128 : StSize == 2 ? ARM::tSTRHi
11129 : StSize == 1 ? ARM::tSTRBi : 0;
11130 if (IsThumb2)
11131 return StSize == 4 ? ARM::t2STR_POST
11132 : StSize == 2 ? ARM::t2STRH_POST
11133 : StSize == 1 ? ARM::t2STRB_POST : 0;
11134 return StSize == 4 ? ARM::STR_POST_IMM
11135 : StSize == 2 ? ARM::STRH_POST
11136 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11137}
11138
11139/// Emit a post-increment load operation with given size. The instructions
11140/// will be added to BB at Pos.
11141static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
11142 const TargetInstrInfo *TII, const DebugLoc &dl,
11143 unsigned LdSize, unsigned Data, unsigned AddrIn,
11144 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11145 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11146 assert(LdOpc != 0 && "Should have a load opcode")(static_cast <bool> (LdOpc != 0 && "Should have a load opcode"
) ? void (0) : __assert_fail ("LdOpc != 0 && \"Should have a load opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11146, __extension__
__PRETTY_FUNCTION__))
;
11147 if (LdSize >= 8) {
11148 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11149 .addReg(AddrOut, RegState::Define)
11150 .addReg(AddrIn)
11151 .addImm(0)
11152 .add(predOps(ARMCC::AL));
11153 } else if (IsThumb1) {
11154 // load + update AddrIn
11155 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11156 .addReg(AddrIn)
11157 .addImm(0)
11158 .add(predOps(ARMCC::AL));
11159 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11160 .add(t1CondCodeOp())
11161 .addReg(AddrIn)
11162 .addImm(LdSize)
11163 .add(predOps(ARMCC::AL));
11164 } else if (IsThumb2) {
11165 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11166 .addReg(AddrOut, RegState::Define)
11167 .addReg(AddrIn)
11168 .addImm(LdSize)
11169 .add(predOps(ARMCC::AL));
11170 } else { // arm
11171 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11172 .addReg(AddrOut, RegState::Define)
11173 .addReg(AddrIn)
11174 .addReg(0)
11175 .addImm(LdSize)
11176 .add(predOps(ARMCC::AL));
11177 }
11178}
11179
11180/// Emit a post-increment store operation with given size. The instructions
11181/// will be added to BB at Pos.
11182static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
11183 const TargetInstrInfo *TII, const DebugLoc &dl,
11184 unsigned StSize, unsigned Data, unsigned AddrIn,
11185 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11186 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11187 assert(StOpc != 0 && "Should have a store opcode")(static_cast <bool> (StOpc != 0 && "Should have a store opcode"
) ? void (0) : __assert_fail ("StOpc != 0 && \"Should have a store opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11187, __extension__
__PRETTY_FUNCTION__))
;
11188 if (StSize >= 8) {
11189 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11190 .addReg(AddrIn)
11191 .addImm(0)
11192 .addReg(Data)
11193 .add(predOps(ARMCC::AL));
11194 } else if (IsThumb1) {
11195 // store + update AddrIn
11196 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11197 .addReg(Data)
11198 .addReg(AddrIn)
11199 .addImm(0)
11200 .add(predOps(ARMCC::AL));
11201 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11202 .add(t1CondCodeOp())
11203 .addReg(AddrIn)
11204 .addImm(StSize)
11205 .add(predOps(ARMCC::AL));
11206 } else if (IsThumb2) {
11207 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11208 .addReg(Data)
11209 .addReg(AddrIn)
11210 .addImm(StSize)
11211 .add(predOps(ARMCC::AL));
11212 } else { // arm
11213 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11214 .addReg(Data)
11215 .addReg(AddrIn)
11216 .addReg(0)
11217 .addImm(StSize)
11218 .add(predOps(ARMCC::AL));
11219 }
11220}
11221
11222MachineBasicBlock *
11223ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11224 MachineBasicBlock *BB) const {
11225 // This pseudo instruction has 3 operands: dst, src, size
11226 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11227 // Otherwise, we will generate unrolled scalar copies.
11228 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11229 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11230 MachineFunction::iterator It = ++BB->getIterator();
11231
11232 Register dest = MI.getOperand(0).getReg();
11233 Register src = MI.getOperand(1).getReg();
11234 unsigned SizeVal = MI.getOperand(2).getImm();
11235 unsigned Alignment = MI.getOperand(3).getImm();
11236 DebugLoc dl = MI.getDebugLoc();
11237
11238 MachineFunction *MF = BB->getParent();
11239 MachineRegisterInfo &MRI = MF->getRegInfo();
11240 unsigned UnitSize = 0;
11241 const TargetRegisterClass *TRC = nullptr;
11242 const TargetRegisterClass *VecTRC = nullptr;
11243
11244 bool IsThumb1 = Subtarget->isThumb1Only();
11245 bool IsThumb2 = Subtarget->isThumb2();
11246 bool IsThumb = Subtarget->isThumb();
11247
11248 if (Alignment & 1) {
11249 UnitSize = 1;
11250 } else if (Alignment & 2) {
11251 UnitSize = 2;
11252 } else {
11253 // Check whether we can use NEON instructions.
11254 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11255 Subtarget->hasNEON()) {
11256 if ((Alignment % 16 == 0) && SizeVal >= 16)
11257 UnitSize = 16;
11258 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11259 UnitSize = 8;
11260 }
11261 // Can't use NEON instructions.
11262 if (UnitSize == 0)
11263 UnitSize = 4;
11264 }
11265
11266 // Select the correct opcode and register class for unit size load/store
11267 bool IsNeon = UnitSize >= 8;
11268 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11269 if (IsNeon)
11270 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11271 : UnitSize == 8 ? &ARM::DPRRegClass
11272 : nullptr;
11273
11274 unsigned BytesLeft = SizeVal % UnitSize;
11275 unsigned LoopSize = SizeVal - BytesLeft;
11276
11277 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11278 // Use LDR and STR to copy.
11279 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11280 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11281 unsigned srcIn = src;
11282 unsigned destIn = dest;
11283 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11284 Register srcOut = MRI.createVirtualRegister(TRC);
11285 Register destOut = MRI.createVirtualRegister(TRC);
11286 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11287 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11288 IsThumb1, IsThumb2);
11289 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11290 IsThumb1, IsThumb2);
11291 srcIn = srcOut;
11292 destIn = destOut;
11293 }
11294
11295 // Handle the leftover bytes with LDRB and STRB.
11296 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11297 // [destOut] = STRB_POST(scratch, destIn, 1)
11298 for (unsigned i = 0; i < BytesLeft; i++) {
11299 Register srcOut = MRI.createVirtualRegister(TRC);
11300 Register destOut = MRI.createVirtualRegister(TRC);
11301 Register scratch = MRI.createVirtualRegister(TRC);
11302 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11303 IsThumb1, IsThumb2);
11304 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11305 IsThumb1, IsThumb2);
11306 srcIn = srcOut;
11307 destIn = destOut;
11308 }
11309 MI.eraseFromParent(); // The instruction is gone now.
11310 return BB;
11311 }
11312
11313 // Expand the pseudo op to a loop.
11314 // thisMBB:
11315 // ...
11316 // movw varEnd, # --> with thumb2
11317 // movt varEnd, #
11318 // ldrcp varEnd, idx --> without thumb2
11319 // fallthrough --> loopMBB
11320 // loopMBB:
11321 // PHI varPhi, varEnd, varLoop
11322 // PHI srcPhi, src, srcLoop
11323 // PHI destPhi, dst, destLoop
11324 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11325 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11326 // subs varLoop, varPhi, #UnitSize
11327 // bne loopMBB
11328 // fallthrough --> exitMBB
11329 // exitMBB:
11330 // epilogue to handle left-over bytes
11331 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11332 // [destOut] = STRB_POST(scratch, destLoop, 1)
11333 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11334 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11335 MF->insert(It, loopMBB);
11336 MF->insert(It, exitMBB);
11337
11338 // Transfer the remainder of BB and its successor edges to exitMBB.
11339 exitMBB->splice(exitMBB->begin(), BB,
11340 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11341 exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11342
11343 // Load an immediate to varEnd.
11344 Register varEnd = MRI.createVirtualRegister(TRC);
11345 if (Subtarget->useMovt()) {
11346 unsigned Vtmp = varEnd;
11347 if ((LoopSize & 0xFFFF0000) != 0)
11348 Vtmp = MRI.createVirtualRegister(TRC);
11349 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
11350 .addImm(LoopSize & 0xFFFF)
11351 .add(predOps(ARMCC::AL));
11352
11353 if ((LoopSize & 0xFFFF0000) != 0)
11354 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
11355 .addReg(Vtmp)
11356 .addImm(LoopSize >> 16)
11357 .add(predOps(ARMCC::AL));
11358 } else {
11359 MachineConstantPool *ConstantPool = MF->getConstantPool();
11360 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11361 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11362
11363 // MachineConstantPool wants an explicit alignment.
11364 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11365 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11366 MachineMemOperand *CPMMO =
11367 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
11368 MachineMemOperand::MOLoad, 4, Align(4));
11369
11370 if (IsThumb)
11371 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11372 .addReg(varEnd, RegState::Define)
11373 .addConstantPoolIndex(Idx)
11374 .add(predOps(ARMCC::AL))
11375 .addMemOperand(CPMMO);
11376 else
11377 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11378 .addReg(varEnd, RegState::Define)
11379 .addConstantPoolIndex(Idx)
11380 .addImm(0)
11381 .add(predOps(ARMCC::AL))
11382 .addMemOperand(CPMMO);
11383 }
11384 BB->addSuccessor(loopMBB);
11385
11386 // Generate the loop body:
11387 // varPhi = PHI(varLoop, varEnd)
11388 // srcPhi = PHI(srcLoop, src)
11389 // destPhi = PHI(destLoop, dst)
11390 MachineBasicBlock *entryBB = BB;
11391 BB = loopMBB;
11392 Register varLoop = MRI.createVirtualRegister(TRC);
11393 Register varPhi = MRI.createVirtualRegister(TRC);
11394 Register srcLoop = MRI.createVirtualRegister(TRC);
11395 Register srcPhi = MRI.createVirtualRegister(TRC);
11396 Register destLoop = MRI.createVirtualRegister(TRC);
11397 Register destPhi = MRI.createVirtualRegister(TRC);
11398
11399 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11400 .addReg(varLoop).addMBB(loopMBB)
11401 .addReg(varEnd).addMBB(entryBB);
11402 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11403 .addReg(srcLoop).addMBB(loopMBB)
11404 .addReg(src).addMBB(entryBB);
11405 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11406 .addReg(destLoop).addMBB(loopMBB)
11407 .addReg(dest).addMBB(entryBB);
11408
11409 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11410 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11411 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11412 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11413 IsThumb1, IsThumb2);
11414 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11415 IsThumb1, IsThumb2);
11416
11417 // Decrement loop variable by UnitSize.
11418 if (IsThumb1) {
11419 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11420 .add(t1CondCodeOp())
11421 .addReg(varPhi)
11422 .addImm(UnitSize)
11423 .add(predOps(ARMCC::AL));
11424 } else {
11425 MachineInstrBuilder MIB =
11426 BuildMI(*BB, BB->end(), dl,
11427 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11428 MIB.addReg(varPhi)
11429 .addImm(UnitSize)
11430 .add(predOps(ARMCC::AL))
11431 .add(condCodeOp());
11432 MIB->getOperand(5).setReg(ARM::CPSR);
11433 MIB->getOperand(5).setIsDef(true);
11434 }
11435 BuildMI(*BB, BB->end(), dl,
11436 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11437 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11438
11439 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11440 BB->addSuccessor(loopMBB);
11441 BB->addSuccessor(exitMBB);
11442
11443 // Add epilogue to handle BytesLeft.
11444 BB = exitMBB;
11445 auto StartOfExit = exitMBB->begin();
11446
11447 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11448 // [destOut] = STRB_POST(scratch, destLoop, 1)
11449 unsigned srcIn = srcLoop;
11450 unsigned destIn = destLoop;
11451 for (unsigned i = 0; i < BytesLeft; i++) {
11452 Register srcOut = MRI.createVirtualRegister(TRC);
11453 Register destOut = MRI.createVirtualRegister(TRC);
11454 Register scratch = MRI.createVirtualRegister(TRC);
11455 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11456 IsThumb1, IsThumb2);
11457 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11458 IsThumb1, IsThumb2);
11459 srcIn = srcOut;
11460 destIn = destOut;
11461 }
11462
11463 MI.eraseFromParent(); // The instruction is gone now.
11464 return BB;
11465}
11466
11467MachineBasicBlock *
11468ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11469 MachineBasicBlock *MBB) const {
11470 const TargetMachine &TM = getTargetMachine();
11471 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11472 DebugLoc DL = MI.getDebugLoc();
11473
11474 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11475, __extension__
__PRETTY_FUNCTION__))
11475 "__chkstk is only supported on Windows")(static_cast <bool> (Subtarget->isTargetWindows() &&
"__chkstk is only supported on Windows") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"__chkstk is only supported on Windows\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11475, __extension__
__PRETTY_FUNCTION__))
;
11476 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode")(static_cast <bool> (Subtarget->isThumb2() &&
"Windows on ARM requires Thumb-2 mode") ? void (0) : __assert_fail
("Subtarget->isThumb2() && \"Windows on ARM requires Thumb-2 mode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11476, __extension__
__PRETTY_FUNCTION__))
;
11477
11478 // __chkstk takes the number of words to allocate on the stack in R4, and
11479 // returns the stack adjustment in number of bytes in R4. This will not
11480 // clober any other registers (other than the obvious lr).
11481 //
11482 // Although, technically, IP should be considered a register which may be
11483 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11484 // thumb-2 environment, so there is no interworking required. As a result, we
11485 // do not expect a veneer to be emitted by the linker, clobbering IP.
11486 //
11487 // Each module receives its own copy of __chkstk, so no import thunk is
11488 // required, again, ensuring that IP is not clobbered.
11489 //
11490 // Finally, although some linkers may theoretically provide a trampoline for
11491 // out of range calls (which is quite common due to a 32M range limitation of
11492 // branches for Thumb), we can generate the long-call version via
11493 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11494 // IP.
11495
11496 switch (TM.getCodeModel()) {
11497 case CodeModel::Tiny:
11498 llvm_unreachable("Tiny code model not available on ARM.")::llvm::llvm_unreachable_internal("Tiny code model not available on ARM."
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11498)
;
11499 case CodeModel::Small:
11500 case CodeModel::Medium:
11501 case CodeModel::Kernel:
11502 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11503 .add(predOps(ARMCC::AL))
11504 .addExternalSymbol("__chkstk")
11505 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
11506 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
11507 .addReg(ARM::R12,
11508 RegState::Implicit | RegState::Define | RegState::Dead)
11509 .addReg(ARM::CPSR,
11510 RegState::Implicit | RegState::Define | RegState::Dead);
11511 break;
11512 case CodeModel::Large: {
11513 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
11514 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11515
11516 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11517 .addExternalSymbol("__chkstk");
11518 BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))
11519 .add(predOps(ARMCC::AL))
11520 .addReg(Reg, RegState::Kill)
11521 .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
11522 .addReg(ARM::R4, RegState::Implicit | RegState::Define)
11523 .addReg(ARM::R12,
11524 RegState::Implicit | RegState::Define | RegState::Dead)
11525 .addReg(ARM::CPSR,
11526 RegState::Implicit | RegState::Define | RegState::Dead);
11527 break;
11528 }
11529 }
11530
11531 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11532 .addReg(ARM::SP, RegState::Kill)
11533 .addReg(ARM::R4, RegState::Kill)
11534 .setMIFlags(MachineInstr::FrameSetup)
11535 .add(predOps(ARMCC::AL))
11536 .add(condCodeOp());
11537
11538 MI.eraseFromParent();
11539 return MBB;
11540}
11541
11542MachineBasicBlock *
11543ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11544 MachineBasicBlock *MBB) const {
11545 DebugLoc DL = MI.getDebugLoc();
11546 MachineFunction *MF = MBB->getParent();
11547 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11548
11549 MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
11550 MF->insert(++MBB->getIterator(), ContBB);
11551 ContBB->splice(ContBB->begin(), MBB,
11552 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11553 ContBB->transferSuccessorsAndUpdatePHIs(MBB);
11554 MBB->addSuccessor(ContBB);
11555
11556 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
11557 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11558 MF->push_back(TrapBB);
11559 MBB->addSuccessor(TrapBB);
11560
11561 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11562 .addReg(MI.getOperand(0).getReg())
11563 .addImm(0)
11564 .add(predOps(ARMCC::AL));
11565 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11566 .addMBB(TrapBB)
11567 .addImm(ARMCC::EQ)
11568 .addReg(ARM::CPSR);
11569
11570 MI.eraseFromParent();
11571 return ContBB;
11572}
11573
11574// The CPSR operand of SelectItr might be missing a kill marker
11575// because there were multiple uses of CPSR, and ISel didn't know
11576// which to mark. Figure out whether SelectItr should have had a
11577// kill marker, and set it if it should. Returns the correct kill
11578// marker value.
11579static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
11580 MachineBasicBlock* BB,
11581 const TargetRegisterInfo* TRI) {
11582 // Scan forward through BB for a use/def of CPSR.
11583 MachineBasicBlock::iterator miI(std::next(SelectItr));
11584 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11585 const MachineInstr& mi = *miI;
11586 if (mi.readsRegister(ARM::CPSR))
11587 return false;
11588 if (mi.definesRegister(ARM::CPSR))
11589 break; // Should have kill-flag - update below.
11590 }
11591
11592 // If we hit the end of the block, check whether CPSR is live into a
11593 // successor.
11594 if (miI == BB->end()) {
11595 for (MachineBasicBlock *Succ : BB->successors())
11596 if (Succ->isLiveIn(ARM::CPSR))
11597 return false;
11598 }
11599
11600 // We found a def, or hit the end of the basic block and CPSR wasn't live
11601 // out. SelectMI should have a kill flag on CPSR.
11602 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11603 return true;
11604}
11605
11606/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11607/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11608static Register genTPEntry(MachineBasicBlock *TpEntry,
11609 MachineBasicBlock *TpLoopBody,
11610 MachineBasicBlock *TpExit, Register OpSizeReg,
11611 const TargetInstrInfo *TII, DebugLoc Dl,
11612 MachineRegisterInfo &MRI) {
11613 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11614 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11615 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11616 .addUse(OpSizeReg)
11617 .addImm(15)
11618 .add(predOps(ARMCC::AL))
11619 .addReg(0);
11620
11621 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11622 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11623 .addUse(AddDestReg, RegState::Kill)
11624 .addImm(4)
11625 .add(predOps(ARMCC::AL))
11626 .addReg(0);
11627
11628 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11629 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11630 .addUse(LsrDestReg, RegState::Kill);
11631
11632 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11633 .addUse(TotalIterationsReg)
11634 .addMBB(TpExit);
11635
11636 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11637 .addMBB(TpLoopBody)
11638 .add(predOps(ARMCC::AL));
11639
11640 return TotalIterationsReg;
11641}
11642
11643/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11644/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11645/// loops.
11646static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11647 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11648 const TargetInstrInfo *TII, DebugLoc Dl,
11649 MachineRegisterInfo &MRI, Register OpSrcReg,
11650 Register OpDestReg, Register ElementCountReg,
11651 Register TotalIterationsReg, bool IsMemcpy) {
11652 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11653 // array, loop iteration counter, predication counter.
11654
11655 Register SrcPhiReg, CurrSrcReg;
11656 if (IsMemcpy) {
11657 // Current position in the src array
11658 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11659 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11660 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11661 .addUse(OpSrcReg)
11662 .addMBB(TpEntry)
11663 .addUse(CurrSrcReg)
11664 .addMBB(TpLoopBody);
11665 }
11666
11667 // Current position in the dest array
11668 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11669 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11670 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11671 .addUse(OpDestReg)
11672 .addMBB(TpEntry)
11673 .addUse(CurrDestReg)
11674 .addMBB(TpLoopBody);
11675
11676 // Current loop counter
11677 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11678 Register RemainingLoopIterationsReg =
11679 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11680 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11681 .addUse(TotalIterationsReg)
11682 .addMBB(TpEntry)
11683 .addUse(RemainingLoopIterationsReg)
11684 .addMBB(TpLoopBody);
11685
11686 // Predication counter
11687 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11688 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11689 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11690 .addUse(ElementCountReg)
11691 .addMBB(TpEntry)
11692 .addUse(RemainingElementsReg)
11693 .addMBB(TpLoopBody);
11694
11695 // Pass predication counter to VCTP
11696 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11697 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11698 .addUse(PredCounterPhiReg)
11699 .addImm(ARMVCC::None)
11700 .addReg(0)
11701 .addReg(0);
11702
11703 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11704 .addUse(PredCounterPhiReg)
11705 .addImm(16)
11706 .add(predOps(ARMCC::AL))
11707 .addReg(0);
11708
11709 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11710 Register SrcValueReg;
11711 if (IsMemcpy) {
11712 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11713 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11714 .addDef(CurrSrcReg)
11715 .addDef(SrcValueReg)
11716 .addReg(SrcPhiReg)
11717 .addImm(16)
11718 .addImm(ARMVCC::Then)
11719 .addUse(VccrReg)
11720 .addReg(0);
11721 } else
11722 SrcValueReg = OpSrcReg;
11723
11724 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11725 .addDef(CurrDestReg)
11726 .addUse(SrcValueReg)
11727 .addReg(DestPhiReg)
11728 .addImm(16)
11729 .addImm(ARMVCC::Then)
11730 .addUse(VccrReg)
11731 .addReg(0);
11732
11733 // Add the pseudoInstrs for decrementing the loop counter and marking the
11734 // end:t2DoLoopDec and t2DoLoopEnd
11735 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11736 .addUse(LoopCounterPhiReg)
11737 .addImm(1);
11738
11739 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11740 .addUse(RemainingLoopIterationsReg)
11741 .addMBB(TpLoopBody);
11742
11743 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11744 .addMBB(TpExit)
11745 .add(predOps(ARMCC::AL));
11746}
11747
11748MachineBasicBlock *
11749ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11750 MachineBasicBlock *BB) const {
11751 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11752 DebugLoc dl = MI.getDebugLoc();
11753 bool isThumb2 = Subtarget->isThumb2();
11754 switch (MI.getOpcode()) {
11755 default: {
11756 MI.print(errs());
11757 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11757)
;
11758 }
11759
11760 // Thumb1 post-indexed loads are really just single-register LDMs.
11761 case ARM::tLDR_postidx: {
11762 MachineOperand Def(MI.getOperand(1));
11763 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11764 .add(Def) // Rn_wb
11765 .add(MI.getOperand(2)) // Rn
11766 .add(MI.getOperand(3)) // PredImm
11767 .add(MI.getOperand(4)) // PredReg
11768 .add(MI.getOperand(0)) // Rt
11769 .cloneMemRefs(MI);
11770 MI.eraseFromParent();
11771 return BB;
11772 }
11773
11774 case ARM::MVE_MEMCPYLOOPINST:
11775 case ARM::MVE_MEMSETLOOPINST: {
11776
11777 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11778 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11779 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11780 // adds the relevant instructions in the TP loop Body for generation of a
11781 // WLSTP loop.
11782
11783 // Below is relevant portion of the CFG after the transformation.
11784 // The Machine Basic Blocks are shown along with branch conditions (in
11785 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11786 // portion of the CFG and may not necessarily be the entry/exit of the
11787 // function.
11788
11789 // (Relevant) CFG after transformation:
11790 // TP entry MBB
11791 // |
11792 // |-----------------|
11793 // (n <= 0) (n > 0)
11794 // | |
11795 // | TP loop Body MBB<--|
11796 // | | |
11797 // \ |___________|
11798 // \ /
11799 // TP exit MBB
11800
11801 MachineFunction *MF = BB->getParent();
11802 MachineFunctionProperties &Properties = MF->getProperties();
11803 MachineRegisterInfo &MRI = MF->getRegInfo();
11804
11805 Register OpDestReg = MI.getOperand(0).getReg();
11806 Register OpSrcReg = MI.getOperand(1).getReg();
11807 Register OpSizeReg = MI.getOperand(2).getReg();
11808
11809 // Allocate the required MBBs and add to parent function.
11810 MachineBasicBlock *TpEntry = BB;
11811 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
11812 MachineBasicBlock *TpExit;
11813
11814 MF->push_back(TpLoopBody);
11815
11816 // If any instructions are present in the current block after
11817 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
11818 // move the instructions into the newly created exit block. If there are no
11819 // instructions add an explicit branch to the FallThrough block and then
11820 // split.
11821 //
11822 // The split is required for two reasons:
11823 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
11824 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
11825 // need to be updated. splitAt() already handles this.
11826 TpExit = BB->splitAt(MI, false);
11827 if (TpExit == BB) {
11828 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "(static_cast <bool> (BB->canFallThrough() &&
"Exit Block must be Fallthrough of the " "block containing memcpy/memset Pseudo"
) ? void (0) : __assert_fail ("BB->canFallThrough() && \"Exit Block must be Fallthrough of the \" \"block containing memcpy/memset Pseudo\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11829, __extension__
__PRETTY_FUNCTION__))
11829 "block containing memcpy/memset Pseudo")(static_cast <bool> (BB->canFallThrough() &&
"Exit Block must be Fallthrough of the " "block containing memcpy/memset Pseudo"
) ? void (0) : __assert_fail ("BB->canFallThrough() && \"Exit Block must be Fallthrough of the \" \"block containing memcpy/memset Pseudo\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 11829, __extension__
__PRETTY_FUNCTION__))
;
11830 TpExit = BB->getFallThrough();
11831 BuildMI(BB, dl, TII->get(ARM::t2B))
11832 .addMBB(TpExit)
11833 .add(predOps(ARMCC::AL));
11834 TpExit = BB->splitAt(MI, false);
11835 }
11836
11837 // Add logic for iteration count
11838 Register TotalIterationsReg =
11839 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
11840
11841 // Add the vectorized (and predicated) loads/store instructions
11842 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
11843 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
11844 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
11845
11846 // Required to avoid conflict with the MachineVerifier during testing.
11847 Properties.reset(MachineFunctionProperties::Property::NoPHIs);
11848
11849 // Connect the blocks
11850 TpEntry->addSuccessor(TpLoopBody);
11851 TpLoopBody->addSuccessor(TpLoopBody);
11852 TpLoopBody->addSuccessor(TpExit);
11853
11854 // Reorder for a more natural layout
11855 TpLoopBody->moveAfter(TpEntry);
11856 TpExit->moveAfter(TpLoopBody);
11857
11858 // Finally, remove the memcpy Psuedo Instruction
11859 MI.eraseFromParent();
11860
11861 // Return the exit block as it may contain other instructions requiring a
11862 // custom inserter
11863 return TpExit;
11864 }
11865
11866 // The Thumb2 pre-indexed stores have the same MI operands, they just
11867 // define them differently in the .td files from the isel patterns, so
11868 // they need pseudos.
11869 case ARM::t2STR_preidx:
11870 MI.setDesc(TII->get(ARM::t2STR_PRE));
11871 return BB;
11872 case ARM::t2STRB_preidx:
11873 MI.setDesc(TII->get(ARM::t2STRB_PRE));
11874 return BB;
11875 case ARM::t2STRH_preidx:
11876 MI.setDesc(TII->get(ARM::t2STRH_PRE));
11877 return BB;
11878
11879 case ARM::STRi_preidx:
11880 case ARM::STRBi_preidx: {
11881 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
11882 : ARM::STRB_PRE_IMM;
11883 // Decode the offset.
11884 unsigned Offset = MI.getOperand(4).getImm();
11885 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
11886 Offset = ARM_AM::getAM2Offset(Offset);
11887 if (isSub)
11888 Offset = -Offset;
11889
11890 MachineMemOperand *MMO = *MI.memoperands_begin();
11891 BuildMI(*BB, MI, dl, TII->get(NewOpc))
11892 .add(MI.getOperand(0)) // Rn_wb
11893 .add(MI.getOperand(1)) // Rt
11894 .add(MI.getOperand(2)) // Rn
11895 .addImm(Offset) // offset (skip GPR==zero_reg)
11896 .add(MI.getOperand(5)) // pred
11897 .add(MI.getOperand(6))
11898 .addMemOperand(MMO);
11899 MI.eraseFromParent();
11900 return BB;
11901 }
11902 case ARM::STRr_preidx:
11903 case ARM::STRBr_preidx:
11904 case ARM::STRH_preidx: {
11905 unsigned NewOpc;
11906 switch (MI.getOpcode()) {
11907 default: llvm_unreachable("unexpected opcode!")::llvm::llvm_unreachable_internal("unexpected opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 11907)
;
11908 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
11909 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
11910 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
11911 }
11912 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
11913 for (const MachineOperand &MO : MI.operands())
11914 MIB.add(MO);
11915 MI.eraseFromParent();
11916 return BB;
11917 }
11918
11919 case ARM::tMOVCCr_pseudo: {
11920 // To "insert" a SELECT_CC instruction, we actually have to insert the
11921 // diamond control-flow pattern. The incoming instruction knows the
11922 // destination vreg to set, the condition code register to branch on, the
11923 // true/false values to select between, and a branch opcode to use.
11924 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11925 MachineFunction::iterator It = ++BB->getIterator();
11926
11927 // thisMBB:
11928 // ...
11929 // TrueVal = ...
11930 // cmpTY ccX, r1, r2
11931 // bCC copy1MBB
11932 // fallthrough --> copy0MBB
11933 MachineBasicBlock *thisMBB = BB;
11934 MachineFunction *F = BB->getParent();
11935 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11936 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11937 F->insert(It, copy0MBB);
11938 F->insert(It, sinkMBB);
11939
11940 // Check whether CPSR is live past the tMOVCCr_pseudo.
11941 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
11942 if (!MI.killsRegister(ARM::CPSR) &&
11943 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
11944 copy0MBB->addLiveIn(ARM::CPSR);
11945 sinkMBB->addLiveIn(ARM::CPSR);
11946 }
11947
11948 // Transfer the remainder of BB and its successor edges to sinkMBB.
11949 sinkMBB->splice(sinkMBB->begin(), BB,
11950 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11951 sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11952
11953 BB->addSuccessor(copy0MBB);
11954 BB->addSuccessor(sinkMBB);
11955
11956 BuildMI(BB, dl, TII->get(ARM::tBcc))
11957 .addMBB(sinkMBB)
11958 .addImm(MI.getOperand(3).getImm())
11959 .addReg(MI.getOperand(4).getReg());
11960
11961 // copy0MBB:
11962 // %FalseValue = ...
11963 // # fallthrough to sinkMBB
11964 BB = copy0MBB;
11965
11966 // Update machine-CFG edges
11967 BB->addSuccessor(sinkMBB);
11968
11969 // sinkMBB:
11970 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11971 // ...
11972 BB = sinkMBB;
11973 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
11974 .addReg(MI.getOperand(1).getReg())
11975 .addMBB(copy0MBB)
11976 .addReg(MI.getOperand(2).getReg())
11977 .addMBB(thisMBB);
11978
11979 MI.eraseFromParent(); // The pseudo instruction is gone now.
11980 return BB;
11981 }
11982
11983 case ARM::BCCi64:
11984 case ARM::BCCZi64: {
11985 // If there is an unconditional branch to the other successor, remove it.
11986 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
11987
11988 // Compare both parts that make up the double comparison separately for
11989 // equality.
11990 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
11991
11992 Register LHS1 = MI.getOperand(1).getReg();
11993 Register LHS2 = MI.getOperand(2).getReg();
11994 if (RHSisZero) {
11995 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
11996 .addReg(LHS1)
11997 .addImm(0)
11998 .add(predOps(ARMCC::AL));
11999 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12000 .addReg(LHS2).addImm(0)
12001 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12002 } else {
12003 Register RHS1 = MI.getOperand(3).getReg();
12004 Register RHS2 = MI.getOperand(4).getReg();
12005 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12006 .addReg(LHS1)
12007 .addReg(RHS1)
12008 .add(predOps(ARMCC::AL));
12009 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12010 .addReg(LHS2).addReg(RHS2)
12011 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12012 }
12013
12014 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12015 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12016 if (MI.getOperand(0).getImm() == ARMCC::NE)
12017 std::swap(destMBB, exitMBB);
12018
12019 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12020 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12021 if (isThumb2)
12022 BuildMI(BB, dl, TII->get(ARM::t2B))
12023 .addMBB(exitMBB)
12024 .add(predOps(ARMCC::AL));
12025 else
12026 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12027
12028 MI.eraseFromParent(); // The pseudo instruction is gone now.
12029 return BB;
12030 }
12031
12032 case ARM::Int_eh_sjlj_setjmp:
12033 case ARM::Int_eh_sjlj_setjmp_nofp:
12034 case ARM::tInt_eh_sjlj_setjmp:
12035 case ARM::t2Int_eh_sjlj_setjmp:
12036 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12037 return BB;
12038
12039 case ARM::Int_eh_sjlj_setup_dispatch:
12040 EmitSjLjDispatchBlock(MI, BB);
12041 return BB;
12042
12043 case ARM::ABS:
12044 case ARM::t2ABS: {
12045 // To insert an ABS instruction, we have to insert the
12046 // diamond control-flow pattern. The incoming instruction knows the
12047 // source vreg to test against 0, the destination vreg to set,
12048 // the condition code register to branch on, the
12049 // true/false values to select between, and a branch opcode to use.
12050 // It transforms
12051 // V1 = ABS V0
12052 // into
12053 // V2 = MOVS V0
12054 // BCC (branch to SinkBB if V0 >= 0)
12055 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12056 // SinkBB: V1 = PHI(V2, V3)
12057 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12058 MachineFunction::iterator BBI = ++BB->getIterator();
12059 MachineFunction *Fn = BB->getParent();
12060 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12061 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12062 Fn->insert(BBI, RSBBB);
12063 Fn->insert(BBI, SinkBB);
12064
12065 Register ABSSrcReg = MI.getOperand(1).getReg();
12066 Register ABSDstReg = MI.getOperand(0).getReg();
12067 bool ABSSrcKIll = MI.getOperand(1).isKill();
12068 bool isThumb2 = Subtarget->isThumb2();
12069 MachineRegisterInfo &MRI = Fn->getRegInfo();
12070 // In Thumb mode S must not be specified if source register is the SP or
12071 // PC and if destination register is the SP, so restrict register class
12072 Register NewRsbDstReg = MRI.createVirtualRegister(
12073 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12074
12075 // Transfer the remainder of BB and its successor edges to sinkMBB.
12076 SinkBB->splice(SinkBB->begin(), BB,
12077 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12078 SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12079
12080 BB->addSuccessor(RSBBB);
12081 BB->addSuccessor(SinkBB);
12082
12083 // fall through to SinkMBB
12084 RSBBB->addSuccessor(SinkBB);
12085
12086 // insert a cmp at the end of BB
12087 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12088 .addReg(ABSSrcReg)
12089 .addImm(0)
12090 .add(predOps(ARMCC::AL));
12091
12092 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12093 BuildMI(BB, dl,
12094 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12095 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
12096
12097 // insert rsbri in RSBBB
12098 // Note: BCC and rsbri will be converted into predicated rsbmi
12099 // by if-conversion pass
12100 BuildMI(*RSBBB, RSBBB->begin(), dl,
12101 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12102 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12103 .addImm(0)
12104 .add(predOps(ARMCC::AL))
12105 .add(condCodeOp());
12106
12107 // insert PHI in SinkBB,
12108 // reuse ABSDstReg to not change uses of ABS instruction
12109 BuildMI(*SinkBB, SinkBB->begin(), dl,
12110 TII->get(ARM::PHI), ABSDstReg)
12111 .addReg(NewRsbDstReg).addMBB(RSBBB)
12112 .addReg(ABSSrcReg).addMBB(BB);
12113
12114 // remove ABS instruction
12115 MI.eraseFromParent();
12116
12117 // return last added BB
12118 return SinkBB;
12119 }
12120 case ARM::COPY_STRUCT_BYVAL_I32:
12121 ++NumLoopByVals;
12122 return EmitStructByval(MI, BB);
12123 case ARM::WIN__CHKSTK:
12124 return EmitLowered__chkstk(MI, BB);
12125 case ARM::WIN__DBZCHK:
12126 return EmitLowered__dbzchk(MI, BB);
12127 }
12128}
12129
12130/// Attaches vregs to MEMCPY that it will use as scratch registers
12131/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12132/// instead of as a custom inserter because we need the use list from the SDNode.
12133static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12134 MachineInstr &MI, const SDNode *Node) {
12135 bool isThumb1 = Subtarget->isThumb1Only();
12136
12137 DebugLoc DL = MI.getDebugLoc();
12138 MachineFunction *MF = MI.getParent()->getParent();
12139 MachineRegisterInfo &MRI = MF->getRegInfo();
12140 MachineInstrBuilder MIB(*MF, MI);
12141
12142 // If the new dst/src is unused mark it as dead.
12143 if (!Node->hasAnyUseOfValue(0)) {
12144 MI.getOperand(0).setIsDead(true);
12145 }
12146 if (!Node->hasAnyUseOfValue(1)) {
12147 MI.getOperand(1).setIsDead(true);
12148 }
12149
12150 // The MEMCPY both defines and kills the scratch registers.
12151 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12152 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12153 : &ARM::GPRRegClass);
12154 MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
12155 }
12156}
12157
12158void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
12159 SDNode *Node) const {
12160 if (MI.getOpcode() == ARM::MEMCPY) {
12161 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12162 return;
12163 }
12164
12165 const MCInstrDesc *MCID = &MI.getDesc();
12166 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12167 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12168 // operand is still set to noreg. If needed, set the optional operand's
12169 // register to CPSR, and remove the redundant implicit def.
12170 //
12171 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12172
12173 // Rename pseudo opcodes.
12174 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12175 unsigned ccOutIdx;
12176 if (NewOpc) {
12177 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12178 MCID = &TII->get(NewOpc);
12179
12180 assert(MCID->getNumOperands() ==(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12183, __extension__
__PRETTY_FUNCTION__))
12181 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12183, __extension__
__PRETTY_FUNCTION__))
12182 && "converted opcode should be the same except for cc_out"(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12183, __extension__
__PRETTY_FUNCTION__))
12183 " (and, on Thumb1, pred)")(static_cast <bool> (MCID->getNumOperands() == MI.getDesc
().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)") ? void (0) : __assert_fail ("MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && \"converted opcode should be the same except for cc_out\" \" (and, on Thumb1, pred)\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12183, __extension__
__PRETTY_FUNCTION__))
;
12184
12185 MI.setDesc(*MCID);
12186
12187 // Add the optional cc_out operand
12188 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12189
12190 // On Thumb1, move all input operands to the end, then add the predicate
12191 if (Subtarget->isThumb1Only()) {
12192 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12193 MI.addOperand(MI.getOperand(1));
12194 MI.removeOperand(1);
12195 }
12196
12197 // Restore the ties
12198 for (unsigned i = MI.getNumOperands(); i--;) {
12199 const MachineOperand& op = MI.getOperand(i);
12200 if (op.isReg() && op.isUse()) {
12201 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12202 if (DefIdx != -1)
12203 MI.tieOperands(DefIdx, i);
12204 }
12205 }
12206
12207 MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
12208 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12209 ccOutIdx = 1;
12210 } else
12211 ccOutIdx = MCID->getNumOperands() - 1;
12212 } else
12213 ccOutIdx = MCID->getNumOperands() - 1;
12214
12215 // Any ARM instruction that sets the 's' bit should specify an optional
12216 // "cc_out" operand in the last operand position.
12217 if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
12218 assert(!NewOpc && "Optional cc_out operand required")(static_cast <bool> (!NewOpc && "Optional cc_out operand required"
) ? void (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12218, __extension__
__PRETTY_FUNCTION__))
;
12219 return;
12220 }
12221 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12222 // since we already have an optional CPSR def.
12223 bool definesCPSR = false;
12224 bool deadCPSR = false;
12225 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12226 ++i) {
12227 const MachineOperand &MO = MI.getOperand(i);
12228 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12229 definesCPSR = true;
12230 if (MO.isDead())
12231 deadCPSR = true;
12232 MI.removeOperand(i);
12233 break;
12234 }
12235 }
12236 if (!definesCPSR) {
12237 assert(!NewOpc && "Optional cc_out operand required")(static_cast <bool> (!NewOpc && "Optional cc_out operand required"
) ? void (0) : __assert_fail ("!NewOpc && \"Optional cc_out operand required\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12237, __extension__
__PRETTY_FUNCTION__))
;
12238 return;
12239 }
12240 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag")(static_cast <bool> (deadCPSR == !Node->hasAnyUseOfValue
(1) && "inconsistent dead flag") ? void (0) : __assert_fail
("deadCPSR == !Node->hasAnyUseOfValue(1) && \"inconsistent dead flag\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12240, __extension__
__PRETTY_FUNCTION__))
;
12241 if (deadCPSR) {
12242 assert(!MI.getOperand(ccOutIdx).getReg() &&(static_cast <bool> (!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand") ? void (0) :
__assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12243, __extension__
__PRETTY_FUNCTION__))
12243 "expect uninitialized optional cc_out operand")(static_cast <bool> (!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand") ? void (0) :
__assert_fail ("!MI.getOperand(ccOutIdx).getReg() && \"expect uninitialized optional cc_out operand\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12243, __extension__
__PRETTY_FUNCTION__))
;
12244 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12245 if (!Subtarget->isThumb1Only())
12246 return;
12247 }
12248
12249 // If this instruction was defined with an optional CPSR def and its dag node
12250 // had a live implicit CPSR def, then activate the optional CPSR def.
12251 MachineOperand &MO = MI.getOperand(ccOutIdx);
12252 MO.setReg(ARM::CPSR);
12253 MO.setIsDef(true);
12254}
12255
12256//===----------------------------------------------------------------------===//
12257// ARM Optimization Hooks
12258//===----------------------------------------------------------------------===//
12259
12260// Helper function that checks if N is a null or all ones constant.
12261static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12262 return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
12263}
12264
12265// Return true if N is conditionally 0 or all ones.
12266// Detects these expressions where cc is an i1 value:
12267//
12268// (select cc 0, y) [AllOnes=0]
12269// (select cc y, 0) [AllOnes=0]
12270// (zext cc) [AllOnes=0]
12271// (sext cc) [AllOnes=0/1]
12272// (select cc -1, y) [AllOnes=1]
12273// (select cc y, -1) [AllOnes=1]
12274//
12275// Invert is set when N is the null/all ones constant when CC is false.
12276// OtherOp is set to the alternative value of N.
12277static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
12278 SDValue &CC, bool &Invert,
12279 SDValue &OtherOp,
12280 SelectionDAG &DAG) {
12281 switch (N->getOpcode()) {
12282 default: return false;
12283 case ISD::SELECT: {
12284 CC = N->getOperand(0);
12285 SDValue N1 = N->getOperand(1);
12286 SDValue N2 = N->getOperand(2);
12287 if (isZeroOrAllOnes(N1, AllOnes)) {
12288 Invert = false;
12289 OtherOp = N2;
12290 return true;
12291 }
12292 if (isZeroOrAllOnes(N2, AllOnes)) {
12293 Invert = true;
12294 OtherOp = N1;
12295 return true;
12296 }
12297 return false;
12298 }
12299 case ISD::ZERO_EXTEND:
12300 // (zext cc) can never be the all ones value.
12301 if (AllOnes)
12302 return false;
12303 [[fallthrough]];
12304 case ISD::SIGN_EXTEND: {
12305 SDLoc dl(N);
12306 EVT VT = N->getValueType(0);
12307 CC = N->getOperand(0);
12308 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12309 return false;
12310 Invert = !AllOnes;
12311 if (AllOnes)
12312 // When looking for an AllOnes constant, N is an sext, and the 'other'
12313 // value is 0.
12314 OtherOp = DAG.getConstant(0, dl, VT);
12315 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12316 // When looking for a 0 constant, N can be zext or sext.
12317 OtherOp = DAG.getConstant(1, dl, VT);
12318 else
12319 OtherOp = DAG.getAllOnesConstant(dl, VT);
12320 return true;
12321 }
12322 }
12323}
12324
12325// Combine a constant select operand into its use:
12326//
12327// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12328// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12329// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12330// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12331// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12332//
12333// The transform is rejected if the select doesn't have a constant operand that
12334// is null, or all ones when AllOnes is set.
12335//
12336// Also recognize sext/zext from i1:
12337//
12338// (add (zext cc), x) -> (select cc (add x, 1), x)
12339// (add (sext cc), x) -> (select cc (add x, -1), x)
12340//
12341// These transformations eventually create predicated instructions.
12342//
12343// @param N The node to transform.
12344// @param Slct The N operand that is a select.
12345// @param OtherOp The other N operand (x above).
12346// @param DCI Context.
12347// @param AllOnes Require the select constant to be all ones instead of null.
12348// @returns The new node, or SDValue() on failure.
12349static
12350SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
12351 TargetLowering::DAGCombinerInfo &DCI,
12352 bool AllOnes = false) {
12353 SelectionDAG &DAG = DCI.DAG;
12354 EVT VT = N->getValueType(0);
12355 SDValue NonConstantVal;
12356 SDValue CCOp;
12357 bool SwapSelectOps;
12358 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12359 NonConstantVal, DAG))
12360 return SDValue();
12361
12362 // Slct is now know to be the desired identity constant when CC is true.
12363 SDValue TrueVal = OtherOp;
12364 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12365 OtherOp, NonConstantVal);
12366 // Unless SwapSelectOps says CC should be false.
12367 if (SwapSelectOps)
12368 std::swap(TrueVal, FalseVal);
12369
12370 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12371 CCOp, TrueVal, FalseVal);
12372}
12373
12374// Attempt combineSelectAndUse on each operand of a commutative operator N.
12375static
12376SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
12377 TargetLowering::DAGCombinerInfo &DCI) {
12378 SDValue N0 = N->getOperand(0);
12379 SDValue N1 = N->getOperand(1);
12380 if (N0.getNode()->hasOneUse())
12381 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12382 return Result;
12383 if (N1.getNode()->hasOneUse())
12384 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12385 return Result;
12386 return SDValue();
12387}
12388
12389static bool IsVUZPShuffleNode(SDNode *N) {
12390 // VUZP shuffle node.
12391 if (N->getOpcode() == ARMISD::VUZP)
12392 return true;
12393
12394 // "VUZP" on i32 is an alias for VTRN.
12395 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12396 return true;
12397
12398 return false;
12399}
12400
12401static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,
12402 TargetLowering::DAGCombinerInfo &DCI,
12403 const ARMSubtarget *Subtarget) {
12404 // Look for ADD(VUZP.0, VUZP.1).
12405 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12406 N0 == N1)
12407 return SDValue();
12408
12409 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12410 if (!N->getValueType(0).is64BitVector())
12411 return SDValue();
12412
12413 // Generate vpadd.
12414 SelectionDAG &DAG = DCI.DAG;
12415 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12416 SDLoc dl(N);
12417 SDNode *Unzip = N0.getNode();
12418 EVT VT = N->getValueType(0);
12419
12420 SmallVector<SDValue, 8> Ops;
12421 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12422 TLI.getPointerTy(DAG.getDataLayout())));
12423 Ops.push_back(Unzip->getOperand(0));
12424 Ops.push_back(Unzip->getOperand(1));
12425
12426 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12427}
12428
12429static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
12430 TargetLowering::DAGCombinerInfo &DCI,
12431 const ARMSubtarget *Subtarget) {
12432 // Check for two extended operands.
12433 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12434 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12435 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12436 N1.getOpcode() == ISD::ZERO_EXTEND))
12437 return SDValue();
12438
12439 SDValue N00 = N0.getOperand(0);
12440 SDValue N10 = N1.getOperand(0);
12441
12442 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12443 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12444 N00 == N10)
12445 return SDValue();
12446
12447 // We only recognize Q register paddl here; this can't be reached until
12448 // after type legalization.
12449 if (!N00.getValueType().is64BitVector() ||
12450 !N0.getValueType().is128BitVector())
12451 return SDValue();
12452
12453 // Generate vpaddl.
12454 SelectionDAG &DAG = DCI.DAG;
12455 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12456 SDLoc dl(N);
12457 EVT VT = N->getValueType(0);
12458
12459 SmallVector<SDValue, 8> Ops;
12460 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12461 unsigned Opcode;
12462 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12463 Opcode = Intrinsic::arm_neon_vpaddls;
12464 else
12465 Opcode = Intrinsic::arm_neon_vpaddlu;
12466 Ops.push_back(DAG.getConstant(Opcode, dl,
12467 TLI.getPointerTy(DAG.getDataLayout())));
12468 EVT ElemTy = N00.getValueType().getVectorElementType();
12469 unsigned NumElts = VT.getVectorNumElements();
12470 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12471 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12472 N00.getOperand(0), N00.getOperand(1));
12473 Ops.push_back(Concat);
12474
12475 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12476}
12477
12478// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12479// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12480// much easier to match.
12481static SDValue
12482AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
12483 TargetLowering::DAGCombinerInfo &DCI,
12484 const ARMSubtarget *Subtarget) {
12485 // Only perform optimization if after legalize, and if NEON is available. We
12486 // also expected both operands to be BUILD_VECTORs.
12487 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12488 || N0.getOpcode() != ISD::BUILD_VECTOR
12489 || N1.getOpcode() != ISD::BUILD_VECTOR)
12490 return SDValue();
12491
12492 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12493 EVT VT = N->getValueType(0);
12494 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12495 return SDValue();
12496
12497 // Check that the vector operands are of the right form.
12498 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12499 // operands, where N is the size of the formed vector.
12500 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12501 // index such that we have a pair wise add pattern.
12502
12503 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12504 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12505 return SDValue();
12506 SDValue Vec = N0->getOperand(0)->getOperand(0);
12507 SDNode *V = Vec.getNode();
12508 unsigned nextIndex = 0;
12509
12510 // For each operands to the ADD which are BUILD_VECTORs,
12511 // check to see if each of their operands are an EXTRACT_VECTOR with
12512 // the same vector and appropriate index.
12513 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12514 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
12515 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12516
12517 SDValue ExtVec0 = N0->getOperand(i);
12518 SDValue ExtVec1 = N1->getOperand(i);
12519
12520 // First operand is the vector, verify its the same.
12521 if (V != ExtVec0->getOperand(0).getNode() ||
12522 V != ExtVec1->getOperand(0).getNode())
12523 return SDValue();
12524
12525 // Second is the constant, verify its correct.
12526 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12527 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12528
12529 // For the constant, we want to see all the even or all the odd.
12530 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12531 || C1->getZExtValue() != nextIndex+1)
12532 return SDValue();
12533
12534 // Increment index.
12535 nextIndex+=2;
12536 } else
12537 return SDValue();
12538 }
12539
12540 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12541 // we're using the entire input vector, otherwise there's a size/legality
12542 // mismatch somewhere.
12543 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12544 Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
12545 return SDValue();
12546
12547 // Create VPADDL node.
12548 SelectionDAG &DAG = DCI.DAG;
12549 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12550
12551 SDLoc dl(N);
12552
12553 // Build operand list.
12554 SmallVector<SDValue, 8> Ops;
12555 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12556 TLI.getPointerTy(DAG.getDataLayout())));
12557
12558 // Input is the vector.
12559 Ops.push_back(Vec);
12560
12561 // Get widened type and narrowed type.
12562 MVT widenType;
12563 unsigned numElem = VT.getVectorNumElements();
12564
12565 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12566 switch (inputLaneType.getSimpleVT().SimpleTy) {
12567 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12568 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12569 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12570 default:
12571 llvm_unreachable("Invalid vector element type for padd optimization.")::llvm::llvm_unreachable_internal("Invalid vector element type for padd optimization."
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12571)
;
12572 }
12573
12574 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12575 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12576 return DAG.getNode(ExtOp, dl, VT, tmp);
12577}
12578
12579static SDValue findMUL_LOHI(SDValue V) {
12580 if (V->getOpcode() == ISD::UMUL_LOHI ||
12581 V->getOpcode() == ISD::SMUL_LOHI)
12582 return V;
12583 return SDValue();
12584}
12585
12586static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12587 TargetLowering::DAGCombinerInfo &DCI,
12588 const ARMSubtarget *Subtarget) {
12589 if (!Subtarget->hasBaseDSP())
12590 return SDValue();
12591
12592 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12593 // accumulates the product into a 64-bit value. The 16-bit values will
12594 // be sign extended somehow or SRA'd into 32-bit values
12595 // (addc (adde (mul 16bit, 16bit), lo), hi)
12596 SDValue Mul = AddcNode->getOperand(0);
12597 SDValue Lo = AddcNode->getOperand(1);
12598 if (Mul.getOpcode() != ISD::MUL) {
12599 Lo = AddcNode->getOperand(0);
12600 Mul = AddcNode->getOperand(1);
12601 if (Mul.getOpcode() != ISD::MUL)
12602 return SDValue();
12603 }
12604
12605 SDValue SRA = AddeNode->getOperand(0);
12606 SDValue Hi = AddeNode->getOperand(1);
12607 if (SRA.getOpcode() != ISD::SRA) {
12608 SRA = AddeNode->getOperand(1);
12609 Hi = AddeNode->getOperand(0);
12610 if (SRA.getOpcode() != ISD::SRA)
12611 return SDValue();
12612 }
12613 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12614 if (Const->getZExtValue() != 31)
12615 return SDValue();
12616 } else
12617 return SDValue();
12618
12619 if (SRA.getOperand(0) != Mul)
12620 return SDValue();
12621
12622 SelectionDAG &DAG = DCI.DAG;
12623 SDLoc dl(AddcNode);
12624 unsigned Opcode = 0;
12625 SDValue Op0;
12626 SDValue Op1;
12627
12628 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12629 Opcode = ARMISD::SMLALBB;
12630 Op0 = Mul.getOperand(0);
12631 Op1 = Mul.getOperand(1);
12632 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12633 Opcode = ARMISD::SMLALBT;
12634 Op0 = Mul.getOperand(0);
12635 Op1 = Mul.getOperand(1).getOperand(0);
12636 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12637 Opcode = ARMISD::SMLALTB;
12638 Op0 = Mul.getOperand(0).getOperand(0);
12639 Op1 = Mul.getOperand(1);
12640 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12641 Opcode = ARMISD::SMLALTT;
12642 Op0 = Mul->getOperand(0).getOperand(0);
12643 Op1 = Mul->getOperand(1).getOperand(0);
12644 }
12645
12646 if (!Op0 || !Op1)
12647 return SDValue();
12648
12649 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12650 Op0, Op1, Lo, Hi);
12651 // Replace the ADDs' nodes uses by the MLA node's values.
12652 SDValue HiMLALResult(SMLAL.getNode(), 1);
12653 SDValue LoMLALResult(SMLAL.getNode(), 0);
12654
12655 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12656 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12657
12658 // Return original node to notify the driver to stop replacing.
12659 SDValue resNode(AddcNode, 0);
12660 return resNode;
12661}
12662
12663static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,
12664 TargetLowering::DAGCombinerInfo &DCI,
12665 const ARMSubtarget *Subtarget) {
12666 // Look for multiply add opportunities.
12667 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12668 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12669 // a glue link from the first add to the second add.
12670 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12671 // a S/UMLAL instruction.
12672 // UMUL_LOHI
12673 // / :lo \ :hi
12674 // V \ [no multiline comment]
12675 // loAdd -> ADDC |
12676 // \ :carry /
12677 // V V
12678 // ADDE <- hiAdd
12679 //
12680 // In the special case where only the higher part of a signed result is used
12681 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12682 // a constant with the exact value of 0x80000000, we recognize we are dealing
12683 // with a "rounded multiply and add" (or subtract) and transform it into
12684 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12685
12686 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12688, __extension__
__PRETTY_FUNCTION__))
12687 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12688, __extension__
__PRETTY_FUNCTION__))
12688 "Expect an ADDE or SUBE")(static_cast <bool> ((AddeSubeNode->getOpcode() == ARMISD
::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
"Expect an ADDE or SUBE") ? void (0) : __assert_fail ("(AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && \"Expect an ADDE or SUBE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12688, __extension__
__PRETTY_FUNCTION__))
;
12689
12690 assert(AddeSubeNode->getNumOperands() == 3 &&(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12692, __extension__
__PRETTY_FUNCTION__))
12691 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12692, __extension__
__PRETTY_FUNCTION__))
12692 "ADDE node has the wrong inputs")(static_cast <bool> (AddeSubeNode->getNumOperands() ==
3 && AddeSubeNode->getOperand(2).getValueType() ==
MVT::i32 && "ADDE node has the wrong inputs") ? void
(0) : __assert_fail ("AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && \"ADDE node has the wrong inputs\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12692, __extension__
__PRETTY_FUNCTION__))
;
12693
12694 // Check that we are chained to the right ADDC or SUBC node.
12695 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12696 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12697 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12698 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12699 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12700 return SDValue();
12701
12702 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12703 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12704
12705 // Check if the two operands are from the same mul_lohi node.
12706 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12707 return SDValue();
12708
12709 assert(AddcSubcNode->getNumValues() == 2 &&(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12711, __extension__
__PRETTY_FUNCTION__))
12710 AddcSubcNode->getValueType(0) == MVT::i32 &&(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12711, __extension__
__PRETTY_FUNCTION__))
12711 "Expect ADDC with two result values. First: i32")(static_cast <bool> (AddcSubcNode->getNumValues() ==
2 && AddcSubcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32") ? void (0)
: __assert_fail ("AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && \"Expect ADDC with two result values. First: i32\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 12711, __extension__
__PRETTY_FUNCTION__))
;
12712
12713 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12714 // maybe a SMLAL which multiplies two 16-bit values.
12715 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12716 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12717 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12718 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12719 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12720 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12721
12722 // Check for the triangle shape.
12723 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12724 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12725
12726 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12727 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12728 return SDValue();
12729
12730 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12731 bool IsLeftOperandMUL = false;
12732 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12733 if (MULOp == SDValue())
12734 MULOp = findMUL_LOHI(AddeSubeOp1);
12735 else
12736 IsLeftOperandMUL = true;
12737 if (MULOp == SDValue())
12738 return SDValue();
12739
12740 // Figure out the right opcode.
12741 unsigned Opc = MULOp->getOpcode();
12742 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12743
12744 // Figure out the high and low input values to the MLAL node.
12745 SDValue *HiAddSub = nullptr;
12746 SDValue *LoMul = nullptr;
12747 SDValue *LowAddSub = nullptr;
12748
12749 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12750 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12751 return SDValue();
12752
12753 if (IsLeftOperandMUL)
12754 HiAddSub = &AddeSubeOp1;
12755 else
12756 HiAddSub = &AddeSubeOp0;
12757
12758 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12759 // whose low result is fed to the ADDC/SUBC we are checking.
12760
12761 if (AddcSubcOp0 == MULOp.getValue(0)) {
12762 LoMul = &AddcSubcOp0;
12763 LowAddSub = &AddcSubcOp1;
12764 }
12765 if (AddcSubcOp1 == MULOp.getValue(0)) {
12766 LoMul = &AddcSubcOp1;
12767 LowAddSub = &AddcSubcOp0;
12768 }
12769
12770 if (!LoMul)
12771 return SDValue();
12772
12773 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12774 // the replacement below will create a cycle.
12775 if (AddcSubcNode == HiAddSub->getNode() ||
12776 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12777 return SDValue();
12778
12779 // Create the merged node.
12780 SelectionDAG &DAG = DCI.DAG;
12781
12782 // Start building operand list.
12783 SmallVector<SDValue, 8> Ops;
12784 Ops.push_back(LoMul->getOperand(0));
12785 Ops.push_back(LoMul->getOperand(1));
12786
12787 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
12788 // the case, we must be doing signed multiplication and only use the higher
12789 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
12790 // addition or subtraction with the value of 0x800000.
12791 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
12792 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
12793 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
12794 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
12795 0x80000000) {
12796 Ops.push_back(*HiAddSub);
12797 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
12798 FinalOpc = ARMISD::SMMLSR;
12799 } else {
12800 FinalOpc = ARMISD::SMMLAR;
12801 }
12802 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
12803 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
12804
12805 return SDValue(AddeSubeNode, 0);
12806 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
12807 // SMMLS is generated during instruction selection and the rest of this
12808 // function can not handle the case where AddcSubcNode is a SUBC.
12809 return SDValue();
12810
12811 // Finish building the operand list for {U/S}MLAL
12812 Ops.push_back(*LowAddSub);
12813 Ops.push_back(*HiAddSub);
12814
12815 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
12816 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12817
12818 // Replace the ADDs' nodes uses by the MLA node's values.
12819 SDValue HiMLALResult(MLALNode.getNode(), 1);
12820 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
12821
12822 SDValue LoMLALResult(MLALNode.getNode(), 0);
12823 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
12824
12825 // Return original node to notify the driver to stop replacing.
12826 return SDValue(AddeSubeNode, 0);
12827}
12828
12829static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
12830 TargetLowering::DAGCombinerInfo &DCI,
12831 const ARMSubtarget *Subtarget) {
12832 // UMAAL is similar to UMLAL except that it adds two unsigned values.
12833 // While trying to combine for the other MLAL nodes, first search for the
12834 // chance to use UMAAL. Check if Addc uses a node which has already
12835 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
12836 // as the addend, and it's handled in PerformUMLALCombine.
12837
12838 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12839 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12840
12841 // Check that we have a glued ADDC node.
12842 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
12843 if (AddcNode->getOpcode() != ARMISD::ADDC)
12844 return SDValue();
12845
12846 // Find the converted UMAAL or quit if it doesn't exist.
12847 SDNode *UmlalNode = nullptr;
12848 SDValue AddHi;
12849 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
12850 UmlalNode = AddcNode->getOperand(0).getNode();
12851 AddHi = AddcNode->getOperand(1);
12852 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
12853 UmlalNode = AddcNode->getOperand(1).getNode();
12854 AddHi = AddcNode->getOperand(0);
12855 } else {
12856 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
12857 }
12858
12859 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
12860 // the ADDC as well as Zero.
12861 if (!isNullConstant(UmlalNode->getOperand(3)))
12862 return SDValue();
12863
12864 if ((isNullConstant(AddeNode->getOperand(0)) &&
12865 AddeNode->getOperand(1).getNode() == UmlalNode) ||
12866 (AddeNode->getOperand(0).getNode() == UmlalNode &&
12867 isNullConstant(AddeNode->getOperand(1)))) {
12868 SelectionDAG &DAG = DCI.DAG;
12869 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
12870 UmlalNode->getOperand(2), AddHi };
12871 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
12872 DAG.getVTList(MVT::i32, MVT::i32), Ops);
12873
12874 // Replace the ADDs' nodes uses by the UMAAL node's values.
12875 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
12876 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
12877
12878 // Return original node to notify the driver to stop replacing.
12879 return SDValue(AddeNode, 0);
12880 }
12881 return SDValue();
12882}
12883
12884static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
12885 const ARMSubtarget *Subtarget) {
12886 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
12887 return SDValue();
12888
12889 // Check that we have a pair of ADDC and ADDE as operands.
12890 // Both addends of the ADDE must be zero.
12891 SDNode* AddcNode = N->getOperand(2).getNode();
12892 SDNode* AddeNode = N->getOperand(3).getNode();
12893 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
12894 (AddeNode->getOpcode() == ARMISD::ADDE) &&
12895 isNullConstant(AddeNode->getOperand(0)) &&
12896 isNullConstant(AddeNode->getOperand(1)) &&
12897 (AddeNode->getOperand(2).getNode() == AddcNode))
12898 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
12899 DAG.getVTList(MVT::i32, MVT::i32),
12900 {N->getOperand(0), N->getOperand(1),
12901 AddcNode->getOperand(0), AddcNode->getOperand(1)});
12902 else
12903 return SDValue();
12904}
12905
12906static SDValue PerformAddcSubcCombine(SDNode *N,
12907 TargetLowering::DAGCombinerInfo &DCI,
12908 const ARMSubtarget *Subtarget) {
12909 SelectionDAG &DAG(DCI.DAG);
12910
12911 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
12912 // (SUBC (ADDE 0, 0, C), 1) -> C
12913 SDValue LHS = N->getOperand(0);
12914 SDValue RHS = N->getOperand(1);
12915 if (LHS->getOpcode() == ARMISD::ADDE &&
12916 isNullConstant(LHS->getOperand(0)) &&
12917 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
12918 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
12919 }
12920 }
12921
12922 if (Subtarget->isThumb1Only()) {
12923 SDValue RHS = N->getOperand(1);
12924 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
12925 int32_t imm = C->getSExtValue();
12926 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
12927 SDLoc DL(N);
12928 RHS = DAG.getConstant(-imm, DL, MVT::i32);
12929 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
12930 : ARMISD::ADDC;
12931 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
12932 }
12933 }
12934 }
12935
12936 return SDValue();
12937}
12938
12939static SDValue PerformAddeSubeCombine(SDNode *N,
12940 TargetLowering::DAGCombinerInfo &DCI,
12941 const ARMSubtarget *Subtarget) {
12942 if (Subtarget->isThumb1Only()) {
12943 SelectionDAG &DAG = DCI.DAG;
12944 SDValue RHS = N->getOperand(1);
12945 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
12946 int64_t imm = C->getSExtValue();
12947 if (imm < 0) {
12948 SDLoc DL(N);
12949
12950 // The with-carry-in form matches bitwise not instead of the negation.
12951 // Effectively, the inverse interpretation of the carry flag already
12952 // accounts for part of the negation.
12953 RHS = DAG.getConstant(~imm, DL, MVT::i32);
12954
12955 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
12956 : ARMISD::ADDE;
12957 return DAG.getNode(Opcode, DL, N->getVTList(),
12958 N->getOperand(0), RHS, N->getOperand(2));
12959 }
12960 }
12961 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
12962 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
12963 }
12964 return SDValue();
12965}
12966
12967static SDValue PerformSELECTCombine(SDNode *N,
12968 TargetLowering::DAGCombinerInfo &DCI,
12969 const ARMSubtarget *Subtarget) {
12970 if (!Subtarget->hasMVEIntegerOps())
12971 return SDValue();
12972
12973 SDLoc dl(N);
12974 SDValue SetCC;
12975 SDValue LHS;
12976 SDValue RHS;
12977 ISD::CondCode CC;
12978 SDValue TrueVal;
12979 SDValue FalseVal;
12980
12981 if (N->getOpcode() == ISD::SELECT &&
12982 N->getOperand(0)->getOpcode() == ISD::SETCC) {
12983 SetCC = N->getOperand(0);
12984 LHS = SetCC->getOperand(0);
12985 RHS = SetCC->getOperand(1);
12986 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
12987 TrueVal = N->getOperand(1);
12988 FalseVal = N->getOperand(2);
12989 } else if (N->getOpcode() == ISD::SELECT_CC) {
12990 LHS = N->getOperand(0);
12991 RHS = N->getOperand(1);
12992 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
12993 TrueVal = N->getOperand(2);
12994 FalseVal = N->getOperand(3);
12995 } else {
12996 return SDValue();
12997 }
12998
12999 unsigned int Opcode = 0;
13000 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13001 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13002 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13003 Opcode = ARMISD::VMINVu;
13004 if (CC == ISD::SETUGT)
13005 std::swap(TrueVal, FalseVal);
13006 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13007 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13008 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13009 Opcode = ARMISD::VMINVs;
13010 if (CC == ISD::SETGT)
13011 std::swap(TrueVal, FalseVal);
13012 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13013 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13014 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13015 Opcode = ARMISD::VMAXVu;
13016 if (CC == ISD::SETULT)
13017 std::swap(TrueVal, FalseVal);
13018 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13019 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13020 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13021 Opcode = ARMISD::VMAXVs;
13022 if (CC == ISD::SETLT)
13023 std::swap(TrueVal, FalseVal);
13024 } else
13025 return SDValue();
13026
13027 // Normalise to the right hand side being the vector reduction
13028 switch (TrueVal->getOpcode()) {
13029 case ISD::VECREDUCE_UMIN:
13030 case ISD::VECREDUCE_SMIN:
13031 case ISD::VECREDUCE_UMAX:
13032 case ISD::VECREDUCE_SMAX:
13033 std::swap(LHS, RHS);
13034 std::swap(TrueVal, FalseVal);
13035 break;
13036 }
13037
13038 EVT VectorType = FalseVal->getOperand(0).getValueType();
13039
13040 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13041 VectorType != MVT::v4i32)
13042 return SDValue();
13043
13044 EVT VectorScalarType = VectorType.getVectorElementType();
13045
13046 // The values being selected must also be the ones being compared
13047 if (TrueVal != LHS || FalseVal != RHS)
13048 return SDValue();
13049
13050 EVT LeftType = LHS->getValueType(0);
13051 EVT RightType = RHS->getValueType(0);
13052
13053 // The types must match the reduced type too
13054 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13055 return SDValue();
13056
13057 // Legalise the scalar to an i32
13058 if (VectorScalarType != MVT::i32)
13059 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13060
13061 // Generate the reduction as an i32 for legalisation purposes
13062 auto Reduction =
13063 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13064
13065 // The result isn't actually an i32 so truncate it back to its original type
13066 if (VectorScalarType != MVT::i32)
13067 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13068
13069 return Reduction;
13070}
13071
13072// A special combine for the vqdmulh family of instructions. This is one of the
13073// potential set of patterns that could patch this instruction. The base pattern
13074// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13075// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13076// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13077// the max is unnecessary.
13078static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
13079 EVT VT = N->getValueType(0);
13080 SDValue Shft;
13081 ConstantSDNode *Clamp;
13082
13083 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13084 return SDValue();
13085
13086 if (N->getOpcode() == ISD::SMIN) {
13087 Shft = N->getOperand(0);
13088 Clamp = isConstOrConstSplat(N->getOperand(1));
13089 } else if (N->getOpcode() == ISD::VSELECT) {
13090 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13091 SDValue Cmp = N->getOperand(0);
13092 if (Cmp.getOpcode() != ISD::SETCC ||
13093 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13094 Cmp.getOperand(0) != N->getOperand(1) ||
13095 Cmp.getOperand(1) != N->getOperand(2))
13096 return SDValue();
13097 Shft = N->getOperand(1);
13098 Clamp = isConstOrConstSplat(N->getOperand(2));
13099 } else
13100 return SDValue();
13101
13102 if (!Clamp)
13103 return SDValue();
13104
13105 MVT ScalarType;
13106 int ShftAmt = 0;
13107 switch (Clamp->getSExtValue()) {
13108 case (1 << 7) - 1:
13109 ScalarType = MVT::i8;
13110 ShftAmt = 7;
13111 break;
13112 case (1 << 15) - 1:
13113 ScalarType = MVT::i16;
13114 ShftAmt = 15;
13115 break;
13116 case (1ULL << 31) - 1:
13117 ScalarType = MVT::i32;
13118 ShftAmt = 31;
13119 break;
13120 default:
13121 return SDValue();
13122 }
13123
13124 if (Shft.getOpcode() != ISD::SRA)
13125 return SDValue();
13126 ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));
13127 if (!N1 || N1->getSExtValue() != ShftAmt)
13128 return SDValue();
13129
13130 SDValue Mul = Shft.getOperand(0);
13131 if (Mul.getOpcode() != ISD::MUL)
13132 return SDValue();
13133
13134 SDValue Ext0 = Mul.getOperand(0);
13135 SDValue Ext1 = Mul.getOperand(1);
13136 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13137 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13138 return SDValue();
13139 EVT VecVT = Ext0.getOperand(0).getValueType();
13140 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13141 return SDValue();
13142 if (Ext1.getOperand(0).getValueType() != VecVT ||
13143 VecVT.getScalarType() != ScalarType ||
13144 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13145 return SDValue();
13146
13147 SDLoc DL(Mul);
13148 unsigned LegalLanes = 128 / (ShftAmt + 1);
13149 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13150 // For types smaller than legal vectors extend to be legal and only use needed
13151 // lanes.
13152 if (VecVT.getSizeInBits() < 128) {
13153 EVT ExtVecVT =
13154 MVT::getVectorVT(MVT::getIntegerVT(128 / VecVT.getVectorNumElements()),
13155 VecVT.getVectorNumElements());
13156 SDValue Inp0 =
13157 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13158 SDValue Inp1 =
13159 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13160 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13161 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13162 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13163 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13164 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13165 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13166 }
13167
13168 // For larger types, split into legal sized chunks.
13169 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type")(static_cast <bool> (VecVT.getSizeInBits() % 128 == 0 &&
"Expected a power2 type") ? void (0) : __assert_fail ("VecVT.getSizeInBits() % 128 == 0 && \"Expected a power2 type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13169, __extension__
__PRETTY_FUNCTION__))
;
13170 unsigned NumParts = VecVT.getSizeInBits() / 128;
13171 SmallVector<SDValue> Parts;
13172 for (unsigned I = 0; I < NumParts; ++I) {
13173 SDValue Inp0 =
13174 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13175 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13176 SDValue Inp1 =
13177 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13178 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13179 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13180 Parts.push_back(VQDMULH);
13181 }
13182 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13183 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13184}
13185
13186static SDValue PerformVSELECTCombine(SDNode *N,
13187 TargetLowering::DAGCombinerInfo &DCI,
13188 const ARMSubtarget *Subtarget) {
13189 if (!Subtarget->hasMVEIntegerOps())
13190 return SDValue();
13191
13192 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13193 return V;
13194
13195 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13196 //
13197 // We need to re-implement this optimization here as the implementation in the
13198 // Target-Independent DAGCombiner does not handle the kind of constant we make
13199 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13200 // good reason, allowing truncation there would break other targets).
13201 //
13202 // Currently, this is only done for MVE, as it's the only target that benefits
13203 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13204 if (N->getOperand(0).getOpcode() != ISD::XOR)
13205 return SDValue();
13206 SDValue XOR = N->getOperand(0);
13207
13208 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13209 // It is important to check with truncation allowed as the BUILD_VECTORs we
13210 // generate in those situations will truncate their operands.
13211 ConstantSDNode *Const =
13212 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13213 /*AllowTruncation*/ true);
13214 if (!Const || !Const->isOne())
13215 return SDValue();
13216
13217 // Rewrite into vselect(cond, rhs, lhs).
13218 SDValue Cond = XOR->getOperand(0);
13219 SDValue LHS = N->getOperand(1);
13220 SDValue RHS = N->getOperand(2);
13221 EVT Type = N->getValueType(0);
13222 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13223}
13224
13225// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13226static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
13227 TargetLowering::DAGCombinerInfo &DCI,
13228 const ARMSubtarget *Subtarget) {
13229 SDValue Op0 = N->getOperand(0);
13230 SDValue Op1 = N->getOperand(1);
13231 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13232 EVT VT = N->getValueType(0);
13233
13234 if (!Subtarget->hasMVEIntegerOps() ||
13235 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
13236 return SDValue();
13237
13238 if (CC == ISD::SETUGE) {
13239 std::swap(Op0, Op1);
13240 CC = ISD::SETULT;
13241 }
13242
13243 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13244 Op0.getOpcode() != ISD::BUILD_VECTOR)
13245 return SDValue();
13246
13247 // Check first operand is BuildVector of 0,1,2,...
13248 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13249 if (!Op0.getOperand(I).isUndef() &&
13250 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13251 Op0.getConstantOperandVal(I) == I))
13252 return SDValue();
13253 }
13254
13255 // The second is a Splat of Op1S
13256 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13257 if (!Op1S)
13258 return SDValue();
13259
13260 unsigned Opc;
13261 switch (VT.getVectorNumElements()) {
13262 case 2:
13263 Opc = Intrinsic::arm_mve_vctp64;
13264 break;
13265 case 4:
13266 Opc = Intrinsic::arm_mve_vctp32;
13267 break;
13268 case 8:
13269 Opc = Intrinsic::arm_mve_vctp16;
13270 break;
13271 case 16:
13272 Opc = Intrinsic::arm_mve_vctp8;
13273 break;
13274 default:
13275 return SDValue();
13276 }
13277
13278 SDLoc DL(N);
13279 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13280 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13281 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13282}
13283
13284static SDValue PerformABSCombine(SDNode *N,
13285 TargetLowering::DAGCombinerInfo &DCI,
13286 const ARMSubtarget *Subtarget) {
13287 SelectionDAG &DAG = DCI.DAG;
13288 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13289
13290 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13291 return SDValue();
13292
13293 return TLI.expandABS(N, DAG);
13294}
13295
13296/// PerformADDECombine - Target-specific dag combine transform from
13297/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13298/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13299static SDValue PerformADDECombine(SDNode *N,
13300 TargetLowering::DAGCombinerInfo &DCI,
13301 const ARMSubtarget *Subtarget) {
13302 // Only ARM and Thumb2 support UMLAL/SMLAL.
13303 if (Subtarget->isThumb1Only())
13304 return PerformAddeSubeCombine(N, DCI, Subtarget);
13305
13306 // Only perform the checks after legalize when the pattern is available.
13307 if (DCI.isBeforeLegalize()) return SDValue();
13308
13309 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13310}
13311
13312/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13313/// operands N0 and N1. This is a helper for PerformADDCombine that is
13314/// called with the default operands, and if that fails, with commuted
13315/// operands.
13316static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
13317 TargetLowering::DAGCombinerInfo &DCI,
13318 const ARMSubtarget *Subtarget){
13319 // Attempt to create vpadd for this add.
13320 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13321 return Result;
13322
13323 // Attempt to create vpaddl for this add.
13324 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13325 return Result;
13326 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13327 Subtarget))
13328 return Result;
13329
13330 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13331 if (N0.getNode()->hasOneUse())
13332 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13333 return Result;
13334 return SDValue();
13335}
13336
13337static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
13338 EVT VT = N->getValueType(0);
13339 SDValue N0 = N->getOperand(0);
13340 SDValue N1 = N->getOperand(1);
13341 SDLoc dl(N);
13342
13343 auto IsVecReduce = [](SDValue Op) {
13344 switch (Op.getOpcode()) {
13345 case ISD::VECREDUCE_ADD:
13346 case ARMISD::VADDVs:
13347 case ARMISD::VADDVu:
13348 case ARMISD::VMLAVs:
13349 case ARMISD::VMLAVu:
13350 return true;
13351 }
13352 return false;
13353 };
13354
13355 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13356 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13357 // add(add(X, vecreduce(Y)), vecreduce(Z))
13358 // to make better use of vaddva style instructions.
13359 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13360 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13361 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13362 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13363 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13364 }
13365 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13366 // add(add(add(A, C), reduce(B)), reduce(D))
13367 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13368 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13369 unsigned N0RedOp = 0;
13370 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13371 N0RedOp = 1;
13372 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13373 return SDValue();
13374 }
13375
13376 unsigned N1RedOp = 0;
13377 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13378 N1RedOp = 1;
13379 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13380 return SDValue();
13381
13382 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13383 N1.getOperand(1 - N1RedOp));
13384 SDValue Add1 =
13385 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13386 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13387 }
13388 return SDValue();
13389 };
13390 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13391 return R;
13392 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13393 return R;
13394
13395 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13396 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13397 // by ascending load offsets. This can help cores prefetch if the order of
13398 // loads is more predictable.
13399 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13400 // Check if two reductions are known to load data where one is before/after
13401 // another. Return negative if N0 loads data before N1, positive if N1 is
13402 // before N0 and 0 otherwise if nothing is known.
13403 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13404 // Look through to the first operand of a MUL, for the VMLA case.
13405 // Currently only looks at the first operand, in the hope they are equal.
13406 if (N0.getOpcode() == ISD::MUL)
13407 N0 = N0.getOperand(0);
13408 if (N1.getOpcode() == ISD::MUL)
13409 N1 = N1.getOperand(0);
13410
13411 // Return true if the two operands are loads to the same object and the
13412 // offset of the first is known to be less than the offset of the second.
13413 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13414 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13415 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13416 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13417 Load1->isIndexed())
13418 return 0;
13419
13420 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13421 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13422
13423 if (!BaseLocDecomp0.getBase() ||
13424 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13425 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13426 return 0;
13427 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13428 return -1;
13429 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13430 return 1;
13431 return 0;
13432 };
13433
13434 SDValue X;
13435 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13436 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13437 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13438 N0.getOperand(1).getOperand(0));
13439 if (IsBefore < 0) {
13440 X = N0.getOperand(0);
13441 N0 = N0.getOperand(1);
13442 } else if (IsBefore > 0) {
13443 X = N0.getOperand(1);
13444 N0 = N0.getOperand(0);
13445 } else
13446 return SDValue();
13447 } else if (IsVecReduce(N0.getOperand(0))) {
13448 X = N0.getOperand(1);
13449 N0 = N0.getOperand(0);
13450 } else if (IsVecReduce(N0.getOperand(1))) {
13451 X = N0.getOperand(0);
13452 N0 = N0.getOperand(1);
13453 } else
13454 return SDValue();
13455 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13456 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13457 // Note this is backward to how you would expect. We create
13458 // add(reduce(load + 16), reduce(load + 0)) so that the
13459 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13460 // the X as VADDV(load + 0)
13461 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13462 } else
13463 return SDValue();
13464
13465 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13466 return SDValue();
13467
13468 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13469 return SDValue();
13470
13471 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13472 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13473 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13474 };
13475 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13476 return R;
13477 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13478 return R;
13479 return SDValue();
13480}
13481
13482static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
13483 const ARMSubtarget *Subtarget) {
13484 if (!Subtarget->hasMVEIntegerOps())
13485 return SDValue();
13486
13487 if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))
13488 return R;
13489
13490 EVT VT = N->getValueType(0);
13491 SDValue N0 = N->getOperand(0);
13492 SDValue N1 = N->getOperand(1);
13493 SDLoc dl(N);
13494
13495 if (VT != MVT::i64)
13496 return SDValue();
13497
13498 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13499 // will look like:
13500 // t1: i32,i32 = ARMISD::VADDLVs x
13501 // t2: i64 = build_pair t1, t1:1
13502 // t3: i64 = add t2, y
13503 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13504 // the add to be simplified seperately.
13505 // We also need to check for sext / zext and commutitive adds.
13506 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13507 SDValue NB) {
13508 if (NB->getOpcode() != ISD::BUILD_PAIR)
13509 return SDValue();
13510 SDValue VecRed = NB->getOperand(0);
13511 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13512 VecRed.getResNo() != 0 ||
13513 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13514 return SDValue();
13515
13516 if (VecRed->getOpcode() == OpcodeA) {
13517 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13518 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13519 VecRed.getOperand(0), VecRed.getOperand(1));
13520 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13521 }
13522
13523 SmallVector<SDValue, 4> Ops;
13524 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
13525 DAG.getConstant(0, dl, MVT::i32)));
13526 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
13527 DAG.getConstant(1, dl, MVT::i32)));
13528 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13529 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13530 Ops.push_back(VecRed->getOperand(I));
13531 SDValue Red =
13532 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13533 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13534 SDValue(Red.getNode(), 1));
13535 };
13536
13537 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13538 return M;
13539 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13540 return M;
13541 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13542 return M;
13543 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13544 return M;
13545 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13546 return M;
13547 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13548 return M;
13549 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13550 return M;
13551 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13552 return M;
13553 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13554 return M;
13555 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13556 return M;
13557 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13558 return M;
13559 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13560 return M;
13561 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13562 return M;
13563 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13564 return M;
13565 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13566 return M;
13567 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13568 return M;
13569 return SDValue();
13570}
13571
13572bool
13573ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
13574 CombineLevel Level) const {
13575 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13577, __extension__
__PRETTY_FUNCTION__))
13576 N->getOpcode() == ISD::SRL) &&(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13577, __extension__
__PRETTY_FUNCTION__))
13577 "Expected shift op")(static_cast <bool> ((N->getOpcode() == ISD::SHL || N
->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL
) && "Expected shift op") ? void (0) : __assert_fail (
"(N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"Expected shift op\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13577, __extension__
__PRETTY_FUNCTION__))
;
13578
13579 if (Level == BeforeLegalizeTypes)
13580 return true;
13581
13582 if (N->getOpcode() != ISD::SHL)
13583 return true;
13584
13585 if (Subtarget->isThumb1Only()) {
13586 // Avoid making expensive immediates by commuting shifts. (This logic
13587 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13588 // for free.)
13589 if (N->getOpcode() != ISD::SHL)
13590 return true;
13591 SDValue N1 = N->getOperand(0);
13592 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13593 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13594 return true;
13595 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13596 if (Const->getAPIntValue().ult(256))
13597 return false;
13598 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13599 Const->getAPIntValue().sgt(-256))
13600 return false;
13601 }
13602 return true;
13603 }
13604
13605 // Turn off commute-with-shift transform after legalization, so it doesn't
13606 // conflict with PerformSHLSimplify. (We could try to detect when
13607 // PerformSHLSimplify would trigger more precisely, but it isn't
13608 // really necessary.)
13609 return false;
13610}
13611
13612bool ARMTargetLowering::isDesirableToCommuteXorWithShift(
13613 const SDNode *N) const {
13614 assert(N->getOpcode() == ISD::XOR &&(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13617, __extension__
__PRETTY_FUNCTION__))
13615 (N->getOperand(0).getOpcode() == ISD::SHL ||(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13617, __extension__
__PRETTY_FUNCTION__))
13616 N->getOperand(0).getOpcode() == ISD::SRL) &&(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13617, __extension__
__PRETTY_FUNCTION__))
13617 "Expected XOR(SHIFT) pattern")(static_cast <bool> (N->getOpcode() == ISD::XOR &&
(N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand
(0).getOpcode() == ISD::SRL) && "Expected XOR(SHIFT) pattern"
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::XOR && (N->getOperand(0).getOpcode() == ISD::SHL || N->getOperand(0).getOpcode() == ISD::SRL) && \"Expected XOR(SHIFT) pattern\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13617, __extension__
__PRETTY_FUNCTION__))
;
13618
13619 // Only commute if the entire NOT mask is a hidden shifted mask.
13620 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13621 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13622 if (XorC && ShiftC) {
13623 unsigned MaskIdx, MaskLen;
13624 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13625 unsigned ShiftAmt = ShiftC->getZExtValue();
13626 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13627 if (N->getOperand(0).getOpcode() == ISD::SHL)
13628 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13629 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13630 }
13631 }
13632
13633 return false;
13634}
13635
13636bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
13637 const SDNode *N, CombineLevel Level) const {
13638 assert(((N->getOpcode() == ISD::SHL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13642, __extension__
__PRETTY_FUNCTION__))
13639 N->getOperand(0).getOpcode() == ISD::SRL) ||(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13642, __extension__
__PRETTY_FUNCTION__))
13640 (N->getOpcode() == ISD::SRL &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13642, __extension__
__PRETTY_FUNCTION__))
13641 N->getOperand(0).getOpcode() == ISD::SHL)) &&(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13642, __extension__
__PRETTY_FUNCTION__))
13642 "Expected shift-shift mask")(static_cast <bool> (((N->getOpcode() == ISD::SHL &&
N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode
() == ISD::SRL && N->getOperand(0).getOpcode() == ISD
::SHL)) && "Expected shift-shift mask") ? void (0) : __assert_fail
("((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL)) && \"Expected shift-shift mask\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 13642, __extension__
__PRETTY_FUNCTION__))
;
13643
13644 if (!Subtarget->isThumb1Only())
13645 return true;
13646
13647 if (Level == BeforeLegalizeTypes)
13648 return true;
13649
13650 return false;
13651}
13652
13653bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
13654 if (!Subtarget->hasNEON()) {
13655 if (Subtarget->isThumb1Only())
13656 return VT.getScalarSizeInBits() <= 32;
13657 return true;
13658 }
13659 return VT.isScalarInteger();
13660}
13661
13662bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
13663 EVT VT) const {
13664 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13665 return false;
13666
13667 switch (FPVT.getSimpleVT().SimpleTy) {
13668 case MVT::f16:
13669 return Subtarget->hasVFP2Base();
13670 case MVT::f32:
13671 return Subtarget->hasVFP2Base();
13672 case MVT::f64:
13673 return Subtarget->hasFP64();
13674 case MVT::v4f32:
13675 case MVT::v8f16:
13676 return Subtarget->hasMVEFloatOps();
13677 default:
13678 return false;
13679 }
13680}
13681
13682static SDValue PerformSHLSimplify(SDNode *N,
13683 TargetLowering::DAGCombinerInfo &DCI,
13684 const ARMSubtarget *ST) {
13685 // Allow the generic combiner to identify potential bswaps.
13686 if (DCI.isBeforeLegalize())
13687 return SDValue();
13688
13689 // DAG combiner will fold:
13690 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13691 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13692 // Other code patterns that can be also be modified have the following form:
13693 // b + ((a << 1) | 510)
13694 // b + ((a << 1) & 510)
13695 // b + ((a << 1) ^ 510)
13696 // b + ((a << 1) + 510)
13697
13698 // Many instructions can perform the shift for free, but it requires both
13699 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13700 // instruction will needed. So, unfold back to the original pattern if:
13701 // - if c1 and c2 are small enough that they don't require mov imms.
13702 // - the user(s) of the node can perform an shl
13703
13704 // No shifted operands for 16-bit instructions.
13705 if (ST->isThumb() && ST->isThumb1Only())
13706 return SDValue();
13707
13708 // Check that all the users could perform the shl themselves.
13709 for (auto *U : N->uses()) {
13710 switch(U->getOpcode()) {
13711 default:
13712 return SDValue();
13713 case ISD::SUB:
13714 case ISD::ADD:
13715 case ISD::AND:
13716 case ISD::OR:
13717 case ISD::XOR:
13718 case ISD::SETCC:
13719 case ARMISD::CMP:
13720 // Check that the user isn't already using a constant because there
13721 // aren't any instructions that support an immediate operand and a
13722 // shifted operand.
13723 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13724 isa<ConstantSDNode>(U->getOperand(1)))
13725 return SDValue();
13726
13727 // Check that it's not already using a shift.
13728 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13729 U->getOperand(1).getOpcode() == ISD::SHL)
13730 return SDValue();
13731 break;
13732 }
13733 }
13734
13735 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13736 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13737 return SDValue();
13738
13739 if (N->getOperand(0).getOpcode() != ISD::SHL)
13740 return SDValue();
13741
13742 SDValue SHL = N->getOperand(0);
13743
13744 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13745 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13746 if (!C1ShlC2 || !C2)
13747 return SDValue();
13748
13749 APInt C2Int = C2->getAPIntValue();
13750 APInt C1Int = C1ShlC2->getAPIntValue();
13751
13752 // Check that performing a lshr will not lose any information.
13753 APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(),
13754 C2Int.getBitWidth() - C2->getZExtValue());
13755 if ((C1Int & Mask) != C1Int)
13756 return SDValue();
13757
13758 // Shift the first constant.
13759 C1Int.lshrInPlace(C2Int);
13760
13761 // The immediates are encoded as an 8-bit value that can be rotated.
13762 auto LargeImm = [](const APInt &Imm) {
13763 unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
13764 return Imm.getBitWidth() - Zeros > 8;
13765 };
13766
13767 if (LargeImm(C1Int) || LargeImm(C2Int))
13768 return SDValue();
13769
13770 SelectionDAG &DAG = DCI.DAG;
13771 SDLoc dl(N);
13772 SDValue X = SHL.getOperand(0);
13773 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13774 DAG.getConstant(C1Int, dl, MVT::i32));
13775 // Shift left to compensate for the lshr of C1Int.
13776 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
13777
13778 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
13779 SHL.dump(); N->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Simplify shl use:\n"; SHL.getOperand
(0).dump(); SHL.dump(); N->dump(); } } while (false)
;
13780 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "Into:\n"; X.dump(); BinOp.dump
(); Res.dump(); } } while (false)
;
13781 return Res;
13782}
13783
13784
13785/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
13786///
13787static SDValue PerformADDCombine(SDNode *N,
13788 TargetLowering::DAGCombinerInfo &DCI,
13789 const ARMSubtarget *Subtarget) {
13790 SDValue N0 = N->getOperand(0);
13791 SDValue N1 = N->getOperand(1);
13792
13793 // Only works one way, because it needs an immediate operand.
13794 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
13795 return Result;
13796
13797 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
13798 return Result;
13799
13800 // First try with the default operand order.
13801 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
13802 return Result;
13803
13804 // If that didn't work, try again with the operands commuted.
13805 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
13806}
13807
13808// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
13809// providing -X is as cheap as X (currently, just a constant).
13810static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
13811 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
13812 return SDValue();
13813 SDValue CSINC = N->getOperand(1);
13814 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
13815 return SDValue();
13816
13817 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
13818 if (!X)
13819 return SDValue();
13820
13821 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
13822 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
13823 CSINC.getOperand(0)),
13824 CSINC.getOperand(1), CSINC.getOperand(2),
13825 CSINC.getOperand(3));
13826}
13827
13828/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
13829///
13830static SDValue PerformSUBCombine(SDNode *N,
13831 TargetLowering::DAGCombinerInfo &DCI,
13832 const ARMSubtarget *Subtarget) {
13833 SDValue N0 = N->getOperand(0);
13834 SDValue N1 = N->getOperand(1);
13835
13836 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
13837 if (N1.getNode()->hasOneUse())
13838 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
13839 return Result;
13840
13841 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
13842 return R;
13843
13844 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
13845 return SDValue();
13846
13847 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
13848 // so that we can readily pattern match more mve instructions which can use
13849 // a scalar operand.
13850 SDValue VDup = N->getOperand(1);
13851 if (VDup->getOpcode() != ARMISD::VDUP)
13852 return SDValue();
13853
13854 SDValue VMov = N->getOperand(0);
13855 if (VMov->getOpcode() == ISD::BITCAST)
13856 VMov = VMov->getOperand(0);
13857
13858 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
13859 return SDValue();
13860
13861 SDLoc dl(N);
13862 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
13863 DCI.DAG.getConstant(0, dl, MVT::i32),
13864 VDup->getOperand(0));
13865 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
13866}
13867
13868/// PerformVMULCombine
13869/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
13870/// special multiplier accumulator forwarding.
13871/// vmul d3, d0, d2
13872/// vmla d3, d1, d2
13873/// is faster than
13874/// vadd d3, d0, d1
13875/// vmul d3, d3, d2
13876// However, for (A + B) * (A + B),
13877// vadd d2, d0, d1
13878// vmul d3, d0, d2
13879// vmla d3, d1, d2
13880// is slower than
13881// vadd d2, d0, d1
13882// vmul d3, d2, d2
13883static SDValue PerformVMULCombine(SDNode *N,
13884 TargetLowering::DAGCombinerInfo &DCI,
13885 const ARMSubtarget *Subtarget) {
13886 if (!Subtarget->hasVMLxForwarding())
13887 return SDValue();
13888
13889 SelectionDAG &DAG = DCI.DAG;
13890 SDValue N0 = N->getOperand(0);
13891 SDValue N1 = N->getOperand(1);
13892 unsigned Opcode = N0.getOpcode();
13893 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13894 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
13895 Opcode = N1.getOpcode();
13896 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
13897 Opcode != ISD::FADD && Opcode != ISD::FSUB)
13898 return SDValue();
13899 std::swap(N0, N1);
13900 }
13901
13902 if (N0 == N1)
13903 return SDValue();
13904
13905 EVT VT = N->getValueType(0);
13906 SDLoc DL(N);
13907 SDValue N00 = N0->getOperand(0);
13908 SDValue N01 = N0->getOperand(1);
13909 return DAG.getNode(Opcode, DL, VT,
13910 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
13911 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
13912}
13913
13914static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG,
13915 const ARMSubtarget *Subtarget) {
13916 EVT VT = N->getValueType(0);
13917 if (VT != MVT::v2i64)
13918 return SDValue();
13919
13920 SDValue N0 = N->getOperand(0);
13921 SDValue N1 = N->getOperand(1);
13922
13923 auto IsSignExt = [&](SDValue Op) {
13924 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
13925 return SDValue();
13926 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
13927 if (VT.getScalarSizeInBits() == 32)
13928 return Op->getOperand(0);
13929 return SDValue();
13930 };
13931 auto IsZeroExt = [&](SDValue Op) {
13932 // Zero extends are a little more awkward. At the point we are matching
13933 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
13934 // That might be before of after a bitcast depending on how the and is
13935 // placed. Because this has to look through bitcasts, it is currently only
13936 // supported on LE.
13937 if (!Subtarget->isLittle())
13938 return SDValue();
13939
13940 SDValue And = Op;
13941 if (And->getOpcode() == ISD::BITCAST)
13942 And = And->getOperand(0);
13943 if (And->getOpcode() != ISD::AND)
13944 return SDValue();
13945 SDValue Mask = And->getOperand(1);
13946 if (Mask->getOpcode() == ISD::BITCAST)
13947 Mask = Mask->getOperand(0);
13948
13949 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
13950 Mask.getValueType() != MVT::v4i32)
13951 return SDValue();
13952 if (isAllOnesConstant(Mask->getOperand(0)) &&
13953 isNullConstant(Mask->getOperand(1)) &&
13954 isAllOnesConstant(Mask->getOperand(2)) &&
13955 isNullConstant(Mask->getOperand(3)))
13956 return And->getOperand(0);
13957 return SDValue();
13958 };
13959
13960 SDLoc dl(N);
13961 if (SDValue Op0 = IsSignExt(N0)) {
13962 if (SDValue Op1 = IsSignExt(N1)) {
13963 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13964 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
13965 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
13966 }
13967 }
13968 if (SDValue Op0 = IsZeroExt(N0)) {
13969 if (SDValue Op1 = IsZeroExt(N1)) {
13970 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
13971 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
13972 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
13973 }
13974 }
13975
13976 return SDValue();
13977}
13978
13979static SDValue PerformMULCombine(SDNode *N,
13980 TargetLowering::DAGCombinerInfo &DCI,
13981 const ARMSubtarget *Subtarget) {
13982 SelectionDAG &DAG = DCI.DAG;
13983
13984 EVT VT = N->getValueType(0);
13985 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
13986 return PerformMVEVMULLCombine(N, DAG, Subtarget);
13987
13988 if (Subtarget->isThumb1Only())
13989 return SDValue();
13990
13991 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13992 return SDValue();
13993
13994 if (VT.is64BitVector() || VT.is128BitVector())
13995 return PerformVMULCombine(N, DCI, Subtarget);
13996 if (VT != MVT::i32)
13997 return SDValue();
13998
13999 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14000 if (!C)
14001 return SDValue();
14002
14003 int64_t MulAmt = C->getSExtValue();
14004 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
14005
14006 ShiftAmt = ShiftAmt & (32 - 1);
14007 SDValue V = N->getOperand(0);
14008 SDLoc DL(N);
14009
14010 SDValue Res;
14011 MulAmt >>= ShiftAmt;
14012
14013 if (MulAmt >= 0) {
14014 if (isPowerOf2_32(MulAmt - 1)) {
14015 // (mul x, 2^N + 1) => (add (shl x, N), x)
14016 Res = DAG.getNode(ISD::ADD, DL, VT,
14017 V,
14018 DAG.getNode(ISD::SHL, DL, VT,
14019 V,
14020 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14021 MVT::i32)));
14022 } else if (isPowerOf2_32(MulAmt + 1)) {
14023 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14024 Res = DAG.getNode(ISD::SUB, DL, VT,
14025 DAG.getNode(ISD::SHL, DL, VT,
14026 V,
14027 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14028 MVT::i32)),
14029 V);
14030 } else
14031 return SDValue();
14032 } else {
14033 uint64_t MulAmtAbs = -MulAmt;
14034 if (isPowerOf2_32(MulAmtAbs + 1)) {
14035 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14036 Res = DAG.getNode(ISD::SUB, DL, VT,
14037 V,
14038 DAG.getNode(ISD::SHL, DL, VT,
14039 V,
14040 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14041 MVT::i32)));
14042 } else if (isPowerOf2_32(MulAmtAbs - 1)) {
14043 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14044 Res = DAG.getNode(ISD::ADD, DL, VT,
14045 V,
14046 DAG.getNode(ISD::SHL, DL, VT,
14047 V,
14048 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14049 MVT::i32)));
14050 Res = DAG.getNode(ISD::SUB, DL, VT,
14051 DAG.getConstant(0, DL, MVT::i32), Res);
14052 } else
14053 return SDValue();
14054 }
14055
14056 if (ShiftAmt != 0)
14057 Res = DAG.getNode(ISD::SHL, DL, VT,
14058 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14059
14060 // Do not add new nodes to DAG combiner worklist.
14061 DCI.CombineTo(N, Res, false);
14062 return SDValue();
14063}
14064
14065static SDValue CombineANDShift(SDNode *N,
14066 TargetLowering::DAGCombinerInfo &DCI,
14067 const ARMSubtarget *Subtarget) {
14068 // Allow DAGCombine to pattern-match before we touch the canonical form.
14069 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14070 return SDValue();
14071
14072 if (N->getValueType(0) != MVT::i32)
14073 return SDValue();
14074
14075 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14076 if (!N1C)
14077 return SDValue();
14078
14079 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14080 // Don't transform uxtb/uxth.
14081 if (C1 == 255 || C1 == 65535)
14082 return SDValue();
14083
14084 SDNode *N0 = N->getOperand(0).getNode();
14085 if (!N0->hasOneUse())
14086 return SDValue();
14087
14088 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14089 return SDValue();
14090
14091 bool LeftShift = N0->getOpcode() == ISD::SHL;
14092
14093 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14094 if (!N01C)
14095 return SDValue();
14096
14097 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14098 if (!C2 || C2 >= 32)
14099 return SDValue();
14100
14101 // Clear irrelevant bits in the mask.
14102 if (LeftShift)
14103 C1 &= (-1U << C2);
14104 else
14105 C1 &= (-1U >> C2);
14106
14107 SelectionDAG &DAG = DCI.DAG;
14108 SDLoc DL(N);
14109
14110 // We have a pattern of the form "(and (shl x, c2) c1)" or
14111 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14112 // transform to a pair of shifts, to save materializing c1.
14113
14114 // First pattern: right shift, then mask off leading bits.
14115 // FIXME: Use demanded bits?
14116 if (!LeftShift && isMask_32(C1)) {
14117 uint32_t C3 = countLeadingZeros(C1);
14118 if (C2 < C3) {
14119 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14120 DAG.getConstant(C3 - C2, DL, MVT::i32));
14121 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14122 DAG.getConstant(C3, DL, MVT::i32));
14123 }
14124 }
14125
14126 // First pattern, reversed: left shift, then mask off trailing bits.
14127 if (LeftShift && isMask_32(~C1)) {
14128 uint32_t C3 = countTrailingZeros(C1);
14129 if (C2 < C3) {
14130 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14131 DAG.getConstant(C3 - C2, DL, MVT::i32));
14132 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14133 DAG.getConstant(C3, DL, MVT::i32));
14134 }
14135 }
14136
14137 // Second pattern: left shift, then mask off leading bits.
14138 // FIXME: Use demanded bits?
14139 if (LeftShift && isShiftedMask_32(C1)) {
14140 uint32_t Trailing = countTrailingZeros(C1);
14141 uint32_t C3 = countLeadingZeros(C1);
14142 if (Trailing == C2 && C2 + C3 < 32) {
14143 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14144 DAG.getConstant(C2 + C3, DL, MVT::i32));
14145 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14146 DAG.getConstant(C3, DL, MVT::i32));
14147 }
14148 }
14149
14150 // Second pattern, reversed: right shift, then mask off trailing bits.
14151 // FIXME: Handle other patterns of known/demanded bits.
14152 if (!LeftShift && isShiftedMask_32(C1)) {
14153 uint32_t Leading = countLeadingZeros(C1);
14154 uint32_t C3 = countTrailingZeros(C1);
14155 if (Leading == C2 && C2 + C3 < 32) {
14156 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14157 DAG.getConstant(C2 + C3, DL, MVT::i32));
14158 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14159 DAG.getConstant(C3, DL, MVT::i32));
14160 }
14161 }
14162
14163 // FIXME: Transform "(and (shl x, c2) c1)" ->
14164 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14165 // c1.
14166 return SDValue();
14167}
14168
14169static SDValue PerformANDCombine(SDNode *N,
14170 TargetLowering::DAGCombinerInfo &DCI,
14171 const ARMSubtarget *Subtarget) {
14172 // Attempt to use immediate-form VBIC
14173 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14174 SDLoc dl(N);
14175 EVT VT = N->getValueType(0);
14176 SelectionDAG &DAG = DCI.DAG;
14177
14178 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14179 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14180 return SDValue();
14181
14182 APInt SplatBits, SplatUndef;
14183 unsigned SplatBitSize;
14184 bool HasAnyUndefs;
14185 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14186 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14187 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14188 SplatBitSize == 64) {
14189 EVT VbicVT;
14190 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14191 SplatUndef.getZExtValue(), SplatBitSize,
14192 DAG, dl, VbicVT, VT, OtherModImm);
14193 if (Val.getNode()) {
14194 SDValue Input =
14195 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14196 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14197 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14198 }
14199 }
14200 }
14201
14202 if (!Subtarget->isThumb1Only()) {
14203 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14204 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14205 return Result;
14206
14207 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14208 return Result;
14209 }
14210
14211 if (Subtarget->isThumb1Only())
14212 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14213 return Result;
14214
14215 return SDValue();
14216}
14217
14218// Try combining OR nodes to SMULWB, SMULWT.
14219static SDValue PerformORCombineToSMULWBT(SDNode *OR,
14220 TargetLowering::DAGCombinerInfo &DCI,
14221 const ARMSubtarget *Subtarget) {
14222 if (!Subtarget->hasV6Ops() ||
14223 (Subtarget->isThumb() &&
14224 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14225 return SDValue();
14226
14227 SDValue SRL = OR->getOperand(0);
14228 SDValue SHL = OR->getOperand(1);
14229
14230 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14231 SRL = OR->getOperand(1);
14232 SHL = OR->getOperand(0);
14233 }
14234 if (!isSRL16(SRL) || !isSHL16(SHL))
14235 return SDValue();
14236
14237 // The first operands to the shifts need to be the two results from the
14238 // same smul_lohi node.
14239 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14240 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14241 return SDValue();
14242
14243 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14244 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14245 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14246 return SDValue();
14247
14248 // Now we have:
14249 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14250 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14251 // For SMUWB the 16-bit value will signed extended somehow.
14252 // For SMULWT only the SRA is required.
14253 // Check both sides of SMUL_LOHI
14254 SDValue OpS16 = SMULLOHI->getOperand(0);
14255 SDValue OpS32 = SMULLOHI->getOperand(1);
14256
14257 SelectionDAG &DAG = DCI.DAG;
14258 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14259 OpS16 = OpS32;
14260 OpS32 = SMULLOHI->getOperand(0);
14261 }
14262
14263 SDLoc dl(OR);
14264 unsigned Opcode = 0;
14265 if (isS16(OpS16, DAG))
14266 Opcode = ARMISD::SMULWB;
14267 else if (isSRA16(OpS16)) {
14268 Opcode = ARMISD::SMULWT;
14269 OpS16 = OpS16->getOperand(0);
14270 }
14271 else
14272 return SDValue();
14273
14274 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14275 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14276 return SDValue(OR, 0);
14277}
14278
14279static SDValue PerformORCombineToBFI(SDNode *N,
14280 TargetLowering::DAGCombinerInfo &DCI,
14281 const ARMSubtarget *Subtarget) {
14282 // BFI is only available on V6T2+
14283 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14284 return SDValue();
14285
14286 EVT VT = N->getValueType(0);
14287 SDValue N0 = N->getOperand(0);
14288 SDValue N1 = N->getOperand(1);
14289 SelectionDAG &DAG = DCI.DAG;
14290 SDLoc DL(N);
14291 // 1) or (and A, mask), val => ARMbfi A, val, mask
14292 // iff (val & mask) == val
14293 //
14294 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14295 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14296 // && mask == ~mask2
14297 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14298 // && ~mask == mask2
14299 // (i.e., copy a bitfield value into another bitfield of the same width)
14300
14301 if (VT != MVT::i32)
14302 return SDValue();
14303
14304 SDValue N00 = N0.getOperand(0);
14305
14306 // The value and the mask need to be constants so we can verify this is
14307 // actually a bitfield set. If the mask is 0xffff, we can do better
14308 // via a movt instruction, so don't use BFI in that case.
14309 SDValue MaskOp = N0.getOperand(1);
14310 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14311 if (!MaskC)
14312 return SDValue();
14313 unsigned Mask = MaskC->getZExtValue();
14314 if (Mask == 0xffff)
14315 return SDValue();
14316 SDValue Res;
14317 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14318 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14319 if (N1C) {
14320 unsigned Val = N1C->getZExtValue();
14321 if ((Val & ~Mask) != Val)
14322 return SDValue();
14323
14324 if (ARM::isBitFieldInvertedMask(Mask)) {
14325 Val >>= countTrailingZeros(~Mask);
14326
14327 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14328 DAG.getConstant(Val, DL, MVT::i32),
14329 DAG.getConstant(Mask, DL, MVT::i32));
14330
14331 DCI.CombineTo(N, Res, false);
14332 // Return value from the original node to inform the combiner than N is
14333 // now dead.
14334 return SDValue(N, 0);
14335 }
14336 } else if (N1.getOpcode() == ISD::AND) {
14337 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14338 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14339 if (!N11C)
14340 return SDValue();
14341 unsigned Mask2 = N11C->getZExtValue();
14342
14343 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14344 // as is to match.
14345 if (ARM::isBitFieldInvertedMask(Mask) &&
14346 (Mask == ~Mask2)) {
14347 // The pack halfword instruction works better for masks that fit it,
14348 // so use that when it's available.
14349 if (Subtarget->hasDSP() &&
14350 (Mask == 0xffff || Mask == 0xffff0000))
14351 return SDValue();
14352 // 2a
14353 unsigned amt = countTrailingZeros(Mask2);
14354 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14355 DAG.getConstant(amt, DL, MVT::i32));
14356 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14357 DAG.getConstant(Mask, DL, MVT::i32));
14358 DCI.CombineTo(N, Res, false);
14359 // Return value from the original node to inform the combiner than N is
14360 // now dead.
14361 return SDValue(N, 0);
14362 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14363 (~Mask == Mask2)) {
14364 // The pack halfword instruction works better for masks that fit it,
14365 // so use that when it's available.
14366 if (Subtarget->hasDSP() &&
14367 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14368 return SDValue();
14369 // 2b
14370 unsigned lsb = countTrailingZeros(Mask);
14371 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14372 DAG.getConstant(lsb, DL, MVT::i32));
14373 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14374 DAG.getConstant(Mask2, DL, MVT::i32));
14375 DCI.CombineTo(N, Res, false);
14376 // Return value from the original node to inform the combiner than N is
14377 // now dead.
14378 return SDValue(N, 0);
14379 }
14380 }
14381
14382 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14383 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14384 ARM::isBitFieldInvertedMask(~Mask)) {
14385 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14386 // where lsb(mask) == #shamt and masked bits of B are known zero.
14387 SDValue ShAmt = N00.getOperand(1);
14388 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
14389 unsigned LSB = countTrailingZeros(Mask);
14390 if (ShAmtC != LSB)
14391 return SDValue();
14392
14393 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14394 DAG.getConstant(~Mask, DL, MVT::i32));
14395
14396 DCI.CombineTo(N, Res, false);
14397 // Return value from the original node to inform the combiner than N is
14398 // now dead.
14399 return SDValue(N, 0);
14400 }
14401
14402 return SDValue();
14403}
14404
14405static bool isValidMVECond(unsigned CC, bool IsFloat) {
14406 switch (CC) {
14407 case ARMCC::EQ:
14408 case ARMCC::NE:
14409 case ARMCC::LE:
14410 case ARMCC::GT:
14411 case ARMCC::GE:
14412 case ARMCC::LT:
14413 return true;
14414 case ARMCC::HS:
14415 case ARMCC::HI:
14416 return !IsFloat;
14417 default:
14418 return false;
14419 };
14420}
14421
14422static ARMCC::CondCodes getVCMPCondCode(SDValue N) {
14423 if (N->getOpcode() == ARMISD::VCMP)
14424 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14425 else if (N->getOpcode() == ARMISD::VCMPZ)
14426 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14427 else
14428 llvm_unreachable("Not a VCMP/VCMPZ!")::llvm::llvm_unreachable_internal("Not a VCMP/VCMPZ!", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14428)
;
14429}
14430
14431static bool CanInvertMVEVCMP(SDValue N) {
14432 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N));
14433 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14434}
14435
14436static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG,
14437 const ARMSubtarget *Subtarget) {
14438 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14439 // together with predicates
14440 EVT VT = N->getValueType(0);
14441 SDLoc DL(N);
14442 SDValue N0 = N->getOperand(0);
14443 SDValue N1 = N->getOperand(1);
14444
14445 auto IsFreelyInvertable = [&](SDValue V) {
14446 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14447 return CanInvertMVEVCMP(V);
14448 return false;
14449 };
14450
14451 // At least one operand must be freely invertable.
14452 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14453 return SDValue();
14454
14455 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14456 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14457 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14458 return DAG.getLogicalNOT(DL, And, VT);
14459}
14460
14461/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14462static SDValue PerformORCombine(SDNode *N,
14463 TargetLowering::DAGCombinerInfo &DCI,
14464 const ARMSubtarget *Subtarget) {
14465 // Attempt to use immediate-form VORR
14466 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14467 SDLoc dl(N);
14468 EVT VT = N->getValueType(0);
14469 SelectionDAG &DAG = DCI.DAG;
14470
14471 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14472 return SDValue();
14473
14474 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14475 VT == MVT::v8i1 || VT == MVT::v16i1))
14476 return PerformORCombine_i1(N, DAG, Subtarget);
14477
14478 APInt SplatBits, SplatUndef;
14479 unsigned SplatBitSize;
14480 bool HasAnyUndefs;
14481 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14482 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14483 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14484 SplatBitSize == 64) {
14485 EVT VorrVT;
14486 SDValue Val =
14487 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14488 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14489 if (Val.getNode()) {
14490 SDValue Input =
14491 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14492 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14493 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14494 }
14495 }
14496 }
14497
14498 if (!Subtarget->isThumb1Only()) {
14499 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14500 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14501 return Result;
14502 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14503 return Result;
14504 }
14505
14506 SDValue N0 = N->getOperand(0);
14507 SDValue N1 = N->getOperand(1);
14508
14509 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14510 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14511 DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
14512
14513 // The code below optimizes (or (and X, Y), Z).
14514 // The AND operand needs to have a single user to make these optimizations
14515 // profitable.
14516 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14517 return SDValue();
14518
14519 APInt SplatUndef;
14520 unsigned SplatBitSize;
14521 bool HasAnyUndefs;
14522
14523 APInt SplatBits0, SplatBits1;
14524 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14525 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14526 // Ensure that the second operand of both ands are constants
14527 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14528 HasAnyUndefs) && !HasAnyUndefs) {
14529 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14530 HasAnyUndefs) && !HasAnyUndefs) {
14531 // Ensure that the bit width of the constants are the same and that
14532 // the splat arguments are logical inverses as per the pattern we
14533 // are trying to simplify.
14534 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14535 SplatBits0 == ~SplatBits1) {
14536 // Canonicalize the vector type to make instruction selection
14537 // simpler.
14538 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14539 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14540 N0->getOperand(1),
14541 N0->getOperand(0),
14542 N1->getOperand(0));
14543 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14544 }
14545 }
14546 }
14547 }
14548
14549 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14550 // reasonable.
14551 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14552 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14553 return Res;
14554 }
14555
14556 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14557 return Result;
14558
14559 return SDValue();
14560}
14561
14562static SDValue PerformXORCombine(SDNode *N,
14563 TargetLowering::DAGCombinerInfo &DCI,
14564 const ARMSubtarget *Subtarget) {
14565 EVT VT = N->getValueType(0);
14566 SelectionDAG &DAG = DCI.DAG;
14567
14568 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14569 return SDValue();
14570
14571 if (!Subtarget->isThumb1Only()) {
14572 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14573 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14574 return Result;
14575
14576 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14577 return Result;
14578 }
14579
14580 if (Subtarget->hasMVEIntegerOps()) {
14581 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14582 SDValue N0 = N->getOperand(0);
14583 SDValue N1 = N->getOperand(1);
14584 const TargetLowering *TLI = Subtarget->getTargetLowering();
14585 if (TLI->isConstTrueVal(N1) &&
14586 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14587 if (CanInvertMVEVCMP(N0)) {
14588 SDLoc DL(N0);
14589 ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0));
14590
14591 SmallVector<SDValue, 4> Ops;
14592 Ops.push_back(N0->getOperand(0));
14593 if (N0->getOpcode() == ARMISD::VCMP)
14594 Ops.push_back(N0->getOperand(1));
14595 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14596 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14597 }
14598 }
14599 }
14600
14601 return SDValue();
14602}
14603
14604// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14605// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14606// their position in "to" (Rd).
14607static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14608 assert(N->getOpcode() == ARMISD::BFI)(static_cast <bool> (N->getOpcode() == ARMISD::BFI) ?
void (0) : __assert_fail ("N->getOpcode() == ARMISD::BFI"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14608, __extension__
__PRETTY_FUNCTION__))
;
14609
14610 SDValue From = N->getOperand(1);
14611 ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
14612 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
14613
14614 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14615 // #C in the base of the SHR.
14616 if (From->getOpcode() == ISD::SRL &&
14617 isa<ConstantSDNode>(From->getOperand(1))) {
14618 APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
14619 assert(Shift.getLimitedValue() < 32 && "Shift too large!")(static_cast <bool> (Shift.getLimitedValue() < 32 &&
"Shift too large!") ? void (0) : __assert_fail ("Shift.getLimitedValue() < 32 && \"Shift too large!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14619, __extension__
__PRETTY_FUNCTION__))
;
14620 FromMask <<= Shift.getLimitedValue(31);
14621 From = From->getOperand(0);
14622 }
14623
14624 return From;
14625}
14626
14627// If A and B contain one contiguous set of bits, does A | B == A . B?
14628//
14629// Neither A nor B must be zero.
14630static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14631 unsigned LastActiveBitInA = A.countTrailingZeros();
14632 unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
14633 return LastActiveBitInA - 1 == FirstActiveBitInB;
14634}
14635
14636static SDValue FindBFIToCombineWith(SDNode *N) {
14637 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14638 APInt ToMask, FromMask;
14639 SDValue From = ParseBFI(N, ToMask, FromMask);
14640 SDValue To = N->getOperand(0);
14641
14642 SDValue V = To;
14643 if (V.getOpcode() != ARMISD::BFI)
14644 return SDValue();
14645
14646 APInt NewToMask, NewFromMask;
14647 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14648 if (NewFrom != From)
14649 return SDValue();
14650
14651 // Do the written bits conflict with any we've seen so far?
14652 if ((NewToMask & ToMask).getBoolValue())
14653 // Conflicting bits.
14654 return SDValue();
14655
14656 // Are the new bits contiguous when combined with the old bits?
14657 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14658 BitsProperlyConcatenate(FromMask, NewFromMask))
14659 return V;
14660 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14661 BitsProperlyConcatenate(NewFromMask, FromMask))
14662 return V;
14663
14664 return SDValue();
14665}
14666
14667static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
14668 SDValue N0 = N->getOperand(0);
14669 SDValue N1 = N->getOperand(1);
14670
14671 if (N1.getOpcode() == ISD::AND) {
14672 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14673 // the bits being cleared by the AND are not demanded by the BFI.
14674 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14675 if (!N11C)
14676 return SDValue();
14677 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
14678 unsigned LSB = countTrailingZeros(~InvMask);
14679 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB;
14680 assert(Width <(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14682, __extension__
__PRETTY_FUNCTION__))
14681 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14682, __extension__
__PRETTY_FUNCTION__))
14682 "undefined behavior")(static_cast <bool> (Width < static_cast<unsigned
>(std::numeric_limits<unsigned>::digits) && "undefined behavior"
) ? void (0) : __assert_fail ("Width < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 14682, __extension__
__PRETTY_FUNCTION__))
;
14683 unsigned Mask = (1u << Width) - 1;
14684 unsigned Mask2 = N11C->getZExtValue();
14685 if ((Mask & (~Mask2)) == 0)
14686 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14687 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14688 return SDValue();
14689 }
14690
14691 // Look for another BFI to combine with.
14692 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14693 // We've found a BFI.
14694 APInt ToMask1, FromMask1;
14695 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14696
14697 APInt ToMask2, FromMask2;
14698 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14699 assert(From1 == From2)(static_cast <bool> (From1 == From2) ? void (0) : __assert_fail
("From1 == From2", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 14699, __extension__ __PRETTY_FUNCTION__))
;
14700 (void)From2;
14701
14702 // Create a new BFI, combining the two together.
14703 APInt NewFromMask = FromMask1 | FromMask2;
14704 APInt NewToMask = ToMask1 | ToMask2;
14705
14706 EVT VT = N->getValueType(0);
14707 SDLoc dl(N);
14708
14709 if (NewFromMask[0] == 0)
14710 From1 = DAG.getNode(
14711 ISD::SRL, dl, VT, From1,
14712 DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
14713 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14714 DAG.getConstant(~NewToMask, dl, VT));
14715 }
14716
14717 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14718 // that lower bit insertions are performed first, providing that M1 and M2
14719 // do no overlap. This can allow multiple BFI instructions to be combined
14720 // together by the other folds above.
14721 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14722 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14723 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14724
14725 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14726 ToMask1.countLeadingZeros() < ToMask2.countLeadingZeros())
14727 return SDValue();
14728
14729 EVT VT = N->getValueType(0);
14730 SDLoc dl(N);
14731 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14732 N->getOperand(1), N->getOperand(2));
14733 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14734 N0.getOperand(2));
14735 }
14736
14737 return SDValue();
14738}
14739
14740// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14741// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14742// return X if valid.
14743static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
14744 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14745 return SDValue();
14746 SDValue CSInc = Cmp->getOperand(0);
14747
14748 // Ignore any `And 1` nodes that may not yet have been removed. We are
14749 // looking for a value that produces 1/0, so these have no effect on the
14750 // code.
14751 while (CSInc.getOpcode() == ISD::AND &&
14752 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14753 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14754 CSInc = CSInc.getOperand(0);
14755
14756 if (CSInc.getOpcode() == ARMISD::CSINC &&
14757 isNullConstant(CSInc.getOperand(0)) &&
14758 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14759 CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
14760 return CSInc.getOperand(3);
14761 }
14762 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14763 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14764 CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
14765 return CSInc.getOperand(4);
14766 }
14767 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14768 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14769 CC = ARMCC::getOppositeCondition(
14770 (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
14771 return CSInc.getOperand(4);
14772 }
14773 return SDValue();
14774}
14775
14776static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
14777 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
14778 // t92: glue = ARMISD::CMPZ t74, 0
14779 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
14780 // t96: glue = ARMISD::CMPZ t93, 0
14781 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
14782 ARMCC::CondCodes Cond;
14783 if (SDValue C = IsCMPZCSINC(N, Cond))
14784 if (Cond == ARMCC::EQ)
14785 return C;
14786 return SDValue();
14787}
14788
14789static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) {
14790 // Fold away an unneccessary CMPZ/CSINC
14791 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
14792 // if C1==EQ -> CSXYZ A, B, C2, D
14793 // if C1==NE -> CSXYZ A, B, NOT(C2), D
14794 ARMCC::CondCodes Cond;
14795 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
14796 if (N->getConstantOperandVal(2) == ARMCC::EQ)
14797 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14798 N->getOperand(1),
14799 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
14800 if (N->getConstantOperandVal(2) == ARMCC::NE)
14801 return DAG.getNode(
14802 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
14803 N->getOperand(1),
14804 DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C);
14805 }
14806 return SDValue();
14807}
14808
14809/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
14810/// ARMISD::VMOVRRD.
14811static SDValue PerformVMOVRRDCombine(SDNode *N,
14812 TargetLowering::DAGCombinerInfo &DCI,
14813 const ARMSubtarget *Subtarget) {
14814 // vmovrrd(vmovdrr x, y) -> x,y
14815 SDValue InDouble = N->getOperand(0);
14816 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
14817 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
14818
14819 // vmovrrd(load f64) -> (load i32), (load i32)
14820 SDNode *InNode = InDouble.getNode();
14821 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
14822 InNode->getValueType(0) == MVT::f64 &&
14823 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
14824 !cast<LoadSDNode>(InNode)->isVolatile()) {
14825 // TODO: Should this be done for non-FrameIndex operands?
14826 LoadSDNode *LD = cast<LoadSDNode>(InNode);
14827
14828 SelectionDAG &DAG = DCI.DAG;
14829 SDLoc DL(LD);
14830 SDValue BasePtr = LD->getBasePtr();
14831 SDValue NewLD1 =
14832 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
14833 LD->getAlign(), LD->getMemOperand()->getFlags());
14834
14835 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
14836 DAG.getConstant(4, DL, MVT::i32));
14837
14838 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
14839 LD->getPointerInfo().getWithOffset(4),
14840 commonAlignment(LD->getAlign(), 4),
14841 LD->getMemOperand()->getFlags());
14842
14843 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
14844 if (DCI.DAG.getDataLayout().isBigEndian())
14845 std::swap (NewLD1, NewLD2);
14846 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
14847 return Result;
14848 }
14849
14850 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
14851 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
14852 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14853 isa<ConstantSDNode>(InDouble.getOperand(1))) {
14854 SDValue BV = InDouble.getOperand(0);
14855 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
14856 // change lane order under big endian.
14857 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
14858 while (
14859 (BV.getOpcode() == ISD::BITCAST ||
14860 BV.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
14861 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
14862 BVSwap = BV.getOpcode() == ISD::BITCAST;
14863 BV = BV.getOperand(0);
14864 }
14865 if (BV.getValueType() != MVT::v4i32)
14866 return SDValue();
14867
14868 // Handle buildvectors, pulling out the correct lane depending on
14869 // endianness.
14870 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
14871 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
14872 SDValue Op0 = BV.getOperand(Offset);
14873 SDValue Op1 = BV.getOperand(Offset + 1);
14874 if (!Subtarget->isLittle() && BVSwap)
14875 std::swap(Op0, Op1);
14876
14877 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14878 }
14879
14880 // A chain of insert_vectors, grabbing the correct value of the chain of
14881 // inserts.
14882 SDValue Op0, Op1;
14883 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
14884 if (isa<ConstantSDNode>(BV.getOperand(2))) {
14885 if (BV.getConstantOperandVal(2) == Offset)
14886 Op0 = BV.getOperand(1);
14887 if (BV.getConstantOperandVal(2) == Offset + 1)
14888 Op1 = BV.getOperand(1);
14889 }
14890 BV = BV.getOperand(0);
14891 }
14892 if (!Subtarget->isLittle() && BVSwap)
14893 std::swap(Op0, Op1);
14894 if (Op0 && Op1)
14895 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
14896 }
14897
14898 return SDValue();
14899}
14900
14901/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
14902/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
14903static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
14904 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
14905 SDValue Op0 = N->getOperand(0);
14906 SDValue Op1 = N->getOperand(1);
14907 if (Op0.getOpcode() == ISD::BITCAST)
14908 Op0 = Op0.getOperand(0);
14909 if (Op1.getOpcode() == ISD::BITCAST)
14910 Op1 = Op1.getOperand(0);
14911 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
14912 Op0.getNode() == Op1.getNode() &&
14913 Op0.getResNo() == 0 && Op1.getResNo() == 1)
14914 return DAG.getNode(ISD::BITCAST, SDLoc(N),
14915 N->getValueType(0), Op0.getOperand(0));
14916 return SDValue();
14917}
14918
14919static SDValue PerformVMOVhrCombine(SDNode *N,
14920 TargetLowering::DAGCombinerInfo &DCI) {
14921 SDValue Op0 = N->getOperand(0);
14922
14923 // VMOVhr (VMOVrh (X)) -> X
14924 if (Op0->getOpcode() == ARMISD::VMOVrh)
14925 return Op0->getOperand(0);
14926
14927 // FullFP16: half values are passed in S-registers, and we don't
14928 // need any of the bitcast and moves:
14929 //
14930 // t2: f32,ch = CopyFromReg t0, Register:f32 %0
14931 // t5: i32 = bitcast t2
14932 // t18: f16 = ARMISD::VMOVhr t5
14933 if (Op0->getOpcode() == ISD::BITCAST) {
14934 SDValue Copy = Op0->getOperand(0);
14935 if (Copy.getValueType() == MVT::f32 &&
14936 Copy->getOpcode() == ISD::CopyFromReg) {
14937 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
14938 SDValue NewCopy =
14939 DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
14940 return NewCopy;
14941 }
14942 }
14943
14944 // fold (VMOVhr (load x)) -> (load (f16*)x)
14945 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
14946 if (LN0->hasOneUse() && LN0->isUnindexed() &&
14947 LN0->getMemoryVT() == MVT::i16) {
14948 SDValue Load =
14949 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
14950 LN0->getBasePtr(), LN0->getMemOperand());
14951 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
14952 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
14953 return Load;
14954 }
14955 }
14956
14957 // Only the bottom 16 bits of the source register are used.
14958 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
14959 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
14960 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
14961 return SDValue(N, 0);
14962
14963 return SDValue();
14964}
14965
14966static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG) {
14967 SDValue N0 = N->getOperand(0);
14968 EVT VT = N->getValueType(0);
14969
14970 // fold (VMOVrh (fpconst x)) -> const x
14971 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
14972 APFloat V = C->getValueAPF();
14973 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
14974 }
14975
14976 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
14977 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
14978 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14979
14980 SDValue Load =
14981 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
14982 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
14983 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
14984 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14985 return Load;
14986 }
14987
14988 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
14989 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14990 isa<ConstantSDNode>(N0->getOperand(1)))
14991 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
14992 N0->getOperand(1));
14993
14994 return SDValue();
14995}
14996
14997/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
14998/// are normal, non-volatile loads. If so, it is profitable to bitcast an
14999/// i64 vector to have f64 elements, since the value can then be loaded
15000/// directly into a VFP register.
15001static bool hasNormalLoadOperand(SDNode *N) {
15002 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15003 for (unsigned i = 0; i < NumElts; ++i) {
15004 SDNode *Elt = N->getOperand(i).getNode();
15005 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15006 return true;
15007 }
15008 return false;
15009}
15010
15011/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15012/// ISD::BUILD_VECTOR.
15013static SDValue PerformBUILD_VECTORCombine(SDNode *N,
15014 TargetLowering::DAGCombinerInfo &DCI,
15015 const ARMSubtarget *Subtarget) {
15016 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15017 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15018 // into a pair of GPRs, which is fine when the value is used as a scalar,
15019 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15020 SelectionDAG &DAG = DCI.DAG;
15021 if (N->getNumOperands() == 2)
15022 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15023 return RV;
15024
15025 // Load i64 elements as f64 values so that type legalization does not split
15026 // them up into i32 values.
15027 EVT VT = N->getValueType(0);
15028 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15029 return SDValue();
15030 SDLoc dl(N);
15031 SmallVector<SDValue, 8> Ops;
15032 unsigned NumElts = VT.getVectorNumElements();
15033 for (unsigned i = 0; i < NumElts; ++i) {
15034 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15035 Ops.push_back(V);
15036 // Make the DAGCombiner fold the bitcast.
15037 DCI.AddToWorklist(V.getNode());
15038 }
15039 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15040 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15041 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15042}
15043
15044/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15045static SDValue
15046PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15047 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15048 // At that time, we may have inserted bitcasts from integer to float.
15049 // If these bitcasts have survived DAGCombine, change the lowering of this
15050 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15051 // force to use floating point types.
15052
15053 // Make sure we can change the type of the vector.
15054 // This is possible iff:
15055 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15056 // 1.1. Vector is used only once.
15057 // 1.2. Use is a bit convert to an integer type.
15058 // 2. The size of its operands are 32-bits (64-bits are not legal).
15059 EVT VT = N->getValueType(0);
15060 EVT EltVT = VT.getVectorElementType();
15061
15062 // Check 1.1. and 2.
15063 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15064 return SDValue();
15065
15066 // By construction, the input type must be float.
15067 assert(EltVT == MVT::f32 && "Unexpected type!")(static_cast <bool> (EltVT == MVT::f32 && "Unexpected type!"
) ? void (0) : __assert_fail ("EltVT == MVT::f32 && \"Unexpected type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15067, __extension__
__PRETTY_FUNCTION__))
;
15068
15069 // Check 1.2.
15070 SDNode *Use = *N->use_begin();
15071 if (Use->getOpcode() != ISD::BITCAST ||
15072 Use->getValueType(0).isFloatingPoint())
15073 return SDValue();
15074
15075 // Check profitability.
15076 // Model is, if more than half of the relevant operands are bitcast from
15077 // i32, turn the build_vector into a sequence of insert_vector_elt.
15078 // Relevant operands are everything that is not statically
15079 // (i.e., at compile time) bitcasted.
15080 unsigned NumOfBitCastedElts = 0;
15081 unsigned NumElts = VT.getVectorNumElements();
15082 unsigned NumOfRelevantElts = NumElts;
15083 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15084 SDValue Elt = N->getOperand(Idx);
15085 if (Elt->getOpcode() == ISD::BITCAST) {
15086 // Assume only bit cast to i32 will go away.
15087 if (Elt->getOperand(0).getValueType() == MVT::i32)
15088 ++NumOfBitCastedElts;
15089 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15090 // Constants are statically casted, thus do not count them as
15091 // relevant operands.
15092 --NumOfRelevantElts;
15093 }
15094
15095 // Check if more than half of the elements require a non-free bitcast.
15096 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15097 return SDValue();
15098
15099 SelectionDAG &DAG = DCI.DAG;
15100 // Create the new vector type.
15101 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15102 // Check if the type is legal.
15103 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15104 if (!TLI.isTypeLegal(VecVT))
15105 return SDValue();
15106
15107 // Combine:
15108 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15109 // => BITCAST INSERT_VECTOR_ELT
15110 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15111 // (BITCAST EN), N.
15112 SDValue Vec = DAG.getUNDEF(VecVT);
15113 SDLoc dl(N);
15114 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15115 SDValue V = N->getOperand(Idx);
15116 if (V.isUndef())
15117 continue;
15118 if (V.getOpcode() == ISD::BITCAST &&
15119 V->getOperand(0).getValueType() == MVT::i32)
15120 // Fold obvious case.
15121 V = V.getOperand(0);
15122 else {
15123 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15124 // Make the DAGCombiner fold the bitcasts.
15125 DCI.AddToWorklist(V.getNode());
15126 }
15127 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15128 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15129 }
15130 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15131 // Make the DAGCombiner fold the bitcasts.
15132 DCI.AddToWorklist(Vec.getNode());
15133 return Vec;
15134}
15135
15136static SDValue
15137PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15138 EVT VT = N->getValueType(0);
15139 SDValue Op = N->getOperand(0);
15140 SDLoc dl(N);
15141
15142 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15143 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15144 // If the valuetypes are the same, we can remove the cast entirely.
15145 if (Op->getOperand(0).getValueType() == VT)
15146 return Op->getOperand(0);
15147 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15148 }
15149
15150 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15151 // more VPNOT which might get folded as else predicates.
15152 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15153 SDValue X =
15154 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15155 SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,
15156 DCI.DAG.getConstant(65535, dl, MVT::i32));
15157 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15158 }
15159
15160 // Only the bottom 16 bits of the source register are used.
15161 if (Op.getValueType() == MVT::i32) {
15162 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15163 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15164 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15165 return SDValue(N, 0);
15166 }
15167 return SDValue();
15168}
15169
15170static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG,
15171 const ARMSubtarget *ST) {
15172 EVT VT = N->getValueType(0);
15173 SDValue Op = N->getOperand(0);
15174 SDLoc dl(N);
15175
15176 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15177 if (ST->isLittle())
15178 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15179
15180 // VECTOR_REG_CAST undef -> undef
15181 if (Op.isUndef())
15182 return DAG.getUNDEF(VT);
15183
15184 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15185 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15186 // If the valuetypes are the same, we can remove the cast entirely.
15187 if (Op->getOperand(0).getValueType() == VT)
15188 return Op->getOperand(0);
15189 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15190 }
15191
15192 return SDValue();
15193}
15194
15195static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG,
15196 const ARMSubtarget *Subtarget) {
15197 if (!Subtarget->hasMVEIntegerOps())
15198 return SDValue();
15199
15200 EVT VT = N->getValueType(0);
15201 SDValue Op0 = N->getOperand(0);
15202 SDValue Op1 = N->getOperand(1);
15203 ARMCC::CondCodes Cond =
15204 (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
15205 SDLoc dl(N);
15206
15207 // vcmp X, 0, cc -> vcmpz X, cc
15208 if (isZeroVector(Op1))
15209 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15210
15211 unsigned SwappedCond = getSwappedCondition(Cond);
15212 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15213 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15214 if (isZeroVector(Op0))
15215 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15216 DAG.getConstant(SwappedCond, dl, MVT::i32));
15217 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15218 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15219 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15220 DAG.getConstant(SwappedCond, dl, MVT::i32));
15221 }
15222
15223 return SDValue();
15224}
15225
15226/// PerformInsertEltCombine - Target-specific dag combine xforms for
15227/// ISD::INSERT_VECTOR_ELT.
15228static SDValue PerformInsertEltCombine(SDNode *N,
15229 TargetLowering::DAGCombinerInfo &DCI) {
15230 // Bitcast an i64 load inserted into a vector to f64.
15231 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15232 EVT VT = N->getValueType(0);
15233 SDNode *Elt = N->getOperand(1).getNode();
15234 if (VT.getVectorElementType() != MVT::i64 ||
15235 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15236 return SDValue();
15237
15238 SelectionDAG &DAG = DCI.DAG;
15239 SDLoc dl(N);
15240 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15241 VT.getVectorNumElements());
15242 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15243 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15244 // Make the DAGCombiner fold the bitcasts.
15245 DCI.AddToWorklist(Vec.getNode());
15246 DCI.AddToWorklist(V.getNode());
15247 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15248 Vec, V, N->getOperand(2));
15249 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15250}
15251
15252// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15253// directly or bitcast to an integer if the original is a float vector.
15254// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15255// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15256static SDValue
15257PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15258 EVT VT = N->getValueType(0);
15259 SDLoc dl(N);
15260
15261 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15262 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15263 return SDValue();
15264
15265 SDValue Ext = SDValue(N, 0);
15266 if (Ext.getOpcode() == ISD::BITCAST &&
15267 Ext.getOperand(0).getValueType() == MVT::f32)
15268 Ext = Ext.getOperand(0);
15269 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15270 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15271 Ext.getConstantOperandVal(1) % 2 != 0)
15272 return SDValue();
15273 if (Ext->use_size() == 1 &&
15274 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15275 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15276 return SDValue();
15277
15278 SDValue Op0 = Ext.getOperand(0);
15279 EVT VecVT = Op0.getValueType();
15280 unsigned ResNo = Op0.getResNo();
15281 unsigned Lane = Ext.getConstantOperandVal(1);
15282 if (VecVT.getVectorNumElements() != 4)
15283 return SDValue();
15284
15285 // Find another extract, of Lane + 1
15286 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15287 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15288 isa<ConstantSDNode>(V->getOperand(1)) &&
15289 V->getConstantOperandVal(1) == Lane + 1 &&
15290 V->getOperand(0).getResNo() == ResNo;
15291 });
15292 if (OtherIt == Op0->uses().end())
15293 return SDValue();
15294
15295 // For float extracts, we need to be converting to a i32 for both vector
15296 // lanes.
15297 SDValue OtherExt(*OtherIt, 0);
15298 if (OtherExt.getValueType() != MVT::i32) {
15299 if (OtherExt->use_size() != 1 ||
15300 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15301 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15302 return SDValue();
15303 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15304 }
15305
15306 // Convert the type to a f64 and extract with a VMOVRRD.
15307 SDValue F64 = DCI.DAG.getNode(
15308 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15309 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15310 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15311 SDValue VMOVRRD =
15312 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15313
15314 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15315 return VMOVRRD;
15316}
15317
15318static SDValue PerformExtractEltCombine(SDNode *N,
15319 TargetLowering::DAGCombinerInfo &DCI,
15320 const ARMSubtarget *ST) {
15321 SDValue Op0 = N->getOperand(0);
15322 EVT VT = N->getValueType(0);
15323 SDLoc dl(N);
15324
15325 // extract (vdup x) -> x
15326 if (Op0->getOpcode() == ARMISD::VDUP) {
15327 SDValue X = Op0->getOperand(0);
15328 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15329 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15330 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15331 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15332 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15333 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15334
15335 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15336 X = X->getOperand(0);
15337 if (X.getValueType() == VT)
15338 return X;
15339 }
15340
15341 // extract ARM_BUILD_VECTOR -> x
15342 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15343 isa<ConstantSDNode>(N->getOperand(1)) &&
15344 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15345 return Op0.getOperand(N->getConstantOperandVal(1));
15346 }
15347
15348 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15349 if (Op0.getValueType() == MVT::v4i32 &&
15350 isa<ConstantSDNode>(N->getOperand(1)) &&
15351 Op0.getOpcode() == ISD::BITCAST &&
15352 Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
15353 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15354 SDValue BV = Op0.getOperand(0);
15355 unsigned Offset = N->getConstantOperandVal(1);
15356 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15357 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15358 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15359 }
15360
15361 // extract x, n; extract x, n+1 -> VMOVRRD x
15362 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15363 return R;
15364
15365 // extract (MVETrunc(x)) -> extract x
15366 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15367 unsigned Idx = N->getConstantOperandVal(1);
15368 unsigned Vec =
15369 Idx / Op0->getOperand(0).getValueType().getVectorNumElements();
15370 unsigned SubIdx =
15371 Idx % Op0->getOperand(0).getValueType().getVectorNumElements();
15372 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15373 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15374 }
15375
15376 return SDValue();
15377}
15378
15379static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
15380 SDValue Op = N->getOperand(0);
15381 EVT VT = N->getValueType(0);
15382
15383 // sext_inreg(VGETLANEu) -> VGETLANEs
15384 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15385 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15386 Op.getOperand(0).getValueType().getScalarType())
15387 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15388 Op.getOperand(1));
15389
15390 return SDValue();
15391}
15392
15393// When lowering complex nodes that we recognize, like VQDMULH and MULH, we
15394// can end up with shuffle(binop(shuffle, shuffle)), that can be simplified to
15395// binop as the shuffles cancel out.
15396static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
15397 EVT VT = N->getValueType(0);
15398 if (!N->getOperand(1).isUndef() || N->getOperand(0).getValueType() != VT)
15399 return SDValue();
15400 SDValue Op = N->getOperand(0);
15401
15402 // Looking for binary operators that will have been folded from
15403 // truncates/extends.
15404 switch (Op.getOpcode()) {
15405 case ARMISD::VQDMULH:
15406 case ISD::MULHS:
15407 case ISD::MULHU:
15408 case ISD::ABDS:
15409 case ISD::ABDU:
15410 case ISD::AVGFLOORS:
15411 case ISD::AVGFLOORU:
15412 case ISD::AVGCEILS:
15413 case ISD::AVGCEILU:
15414 break;
15415 default:
15416 return SDValue();
15417 }
15418
15419 ShuffleVectorSDNode *Op0 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0));
15420 ShuffleVectorSDNode *Op1 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1));
15421 if (!Op0 || !Op1 || !Op0->getOperand(1).isUndef() ||
15422 !Op1->getOperand(1).isUndef() || Op0->getMask() != Op1->getMask() ||
15423 Op0->getOperand(0).getValueType() != VT)
15424 return SDValue();
15425
15426 // Check the mask turns into an identity shuffle.
15427 ArrayRef<int> NMask = N->getMask();
15428 ArrayRef<int> OpMask = Op0->getMask();
15429 for (int i = 0, e = NMask.size(); i != e; i++) {
15430 if (NMask[i] > 0 && OpMask[NMask[i]] > 0 && OpMask[NMask[i]] != i)
15431 return SDValue();
15432 }
15433
15434 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
15435 Op0->getOperand(0), Op1->getOperand(0));
15436}
15437
15438static SDValue
15439PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
15440 SDValue Vec = N->getOperand(0);
15441 SDValue SubVec = N->getOperand(1);
15442 uint64_t IdxVal = N->getConstantOperandVal(2);
15443 EVT VecVT = Vec.getValueType();
15444 EVT SubVT = SubVec.getValueType();
15445
15446 // Only do this for legal fixed vector types.
15447 if (!VecVT.isFixedLengthVector() ||
15448 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15449 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
15450 return SDValue();
15451
15452 // Ignore widening patterns.
15453 if (IdxVal == 0 && Vec.isUndef())
15454 return SDValue();
15455
15456 // Subvector must be half the width and an "aligned" insertion.
15457 unsigned NumSubElts = SubVT.getVectorNumElements();
15458 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15459 (IdxVal != 0 && IdxVal != NumSubElts))
15460 return SDValue();
15461
15462 // Fold insert_subvector -> concat_vectors
15463 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15464 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15465 SDLoc DL(N);
15466 SDValue Lo, Hi;
15467 if (IdxVal == 0) {
15468 Lo = SubVec;
15469 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15470 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15471 } else {
15472 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15473 DCI.DAG.getVectorIdxConstant(0, DL));
15474 Hi = SubVec;
15475 }
15476 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15477}
15478
15479// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15480static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
15481 SelectionDAG &DAG) {
15482 SDValue Trunc = N->getOperand(0);
15483 EVT VT = Trunc.getValueType();
15484 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15485 return SDValue();
15486
15487 SDLoc DL(Trunc);
15488 if (isVMOVNTruncMask(N->getMask(), VT, false))
15489 return DAG.getNode(
15490 ARMISD::VMOVN, DL, VT,
15491 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15492 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15493 DAG.getConstant(1, DL, MVT::i32));
15494 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15495 return DAG.getNode(
15496 ARMISD::VMOVN, DL, VT,
15497 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15498 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15499 DAG.getConstant(1, DL, MVT::i32));
15500 return SDValue();
15501}
15502
15503/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15504/// ISD::VECTOR_SHUFFLE.
15505static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
15506 if (SDValue R = FlattenVectorShuffle(cast<ShuffleVectorSDNode>(N), DAG))
15507 return R;
15508 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15509 return R;
15510
15511 // The LLVM shufflevector instruction does not require the shuffle mask
15512 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15513 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15514 // operands do not match the mask length, they are extended by concatenating
15515 // them with undef vectors. That is probably the right thing for other
15516 // targets, but for NEON it is better to concatenate two double-register
15517 // size vector operands into a single quad-register size vector. Do that
15518 // transformation here:
15519 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15520 // shuffle(concat(v1, v2), undef)
15521 SDValue Op0 = N->getOperand(0);
15522 SDValue Op1 = N->getOperand(1);
15523 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15524 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15525 Op0.getNumOperands() != 2 ||
15526 Op1.getNumOperands() != 2)
15527 return SDValue();
15528 SDValue Concat0Op1 = Op0.getOperand(1);
15529 SDValue Concat1Op1 = Op1.getOperand(1);
15530 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15531 return SDValue();
15532 // Skip the transformation if any of the types are illegal.
15533 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15534 EVT VT = N->getValueType(0);
15535 if (!TLI.isTypeLegal(VT) ||
15536 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15537 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15538 return SDValue();
15539
15540 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15541 Op0.getOperand(0), Op1.getOperand(0));
15542 // Translate the shuffle mask.
15543 SmallVector<int, 16> NewMask;
15544 unsigned NumElts = VT.getVectorNumElements();
15545 unsigned HalfElts = NumElts/2;
15546 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15547 for (unsigned n = 0; n < NumElts; ++n) {
15548 int MaskElt = SVN->getMaskElt(n);
15549 int NewElt = -1;
15550 if (MaskElt < (int)HalfElts)
15551 NewElt = MaskElt;
15552 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15553 NewElt = HalfElts + MaskElt - NumElts;
15554 NewMask.push_back(NewElt);
15555 }
15556 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15557 DAG.getUNDEF(VT), NewMask);
15558}
15559
15560/// Load/store instruction that can be merged with a base address
15561/// update
15562struct BaseUpdateTarget {
15563 SDNode *N;
15564 bool isIntrinsic;
15565 bool isStore;
15566 unsigned AddrOpIdx;
15567};
15568
15569struct BaseUpdateUser {
15570 /// Instruction that updates a pointer
15571 SDNode *N;
15572 /// Pointer increment operand
15573 SDValue Inc;
15574 /// Pointer increment value if it is a constant, or 0 otherwise
15575 unsigned ConstInc;
15576};
15577
15578static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
15579 struct BaseUpdateUser &User,
15580 bool SimpleConstIncOnly,
15581 TargetLowering::DAGCombinerInfo &DCI) {
15582 SelectionDAG &DAG = DCI.DAG;
15583 SDNode *N = Target.N;
15584 MemSDNode *MemN = cast<MemSDNode>(N);
15585 SDLoc dl(N);
15586
15587 // Find the new opcode for the updating load/store.
15588 bool isLoadOp = true;
15589 bool isLaneOp = false;
15590 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15591 // as an operand.
15592 bool hasAlignment = true;
15593 unsigned NewOpc = 0;
15594 unsigned NumVecs = 0;
15595 if (Target.isIntrinsic) {
15596 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15597 switch (IntNo) {
15598 default:
15599 llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15599)
;
15600 case Intrinsic::arm_neon_vld1:
15601 NewOpc = ARMISD::VLD1_UPD;
15602 NumVecs = 1;
15603 break;
15604 case Intrinsic::arm_neon_vld2:
15605 NewOpc = ARMISD::VLD2_UPD;
15606 NumVecs = 2;
15607 break;
15608 case Intrinsic::arm_neon_vld3:
15609 NewOpc = ARMISD::VLD3_UPD;
15610 NumVecs = 3;
15611 break;
15612 case Intrinsic::arm_neon_vld4:
15613 NewOpc = ARMISD::VLD4_UPD;
15614 NumVecs = 4;
15615 break;
15616 case Intrinsic::arm_neon_vld1x2:
15617 NewOpc = ARMISD::VLD1x2_UPD;
15618 NumVecs = 2;
15619 hasAlignment = false;
15620 break;
15621 case Intrinsic::arm_neon_vld1x3:
15622 NewOpc = ARMISD::VLD1x3_UPD;
15623 NumVecs = 3;
15624 hasAlignment = false;
15625 break;
15626 case Intrinsic::arm_neon_vld1x4:
15627 NewOpc = ARMISD::VLD1x4_UPD;
15628 NumVecs = 4;
15629 hasAlignment = false;
15630 break;
15631 case Intrinsic::arm_neon_vld2dup:
15632 NewOpc = ARMISD::VLD2DUP_UPD;
15633 NumVecs = 2;
15634 break;
15635 case Intrinsic::arm_neon_vld3dup:
15636 NewOpc = ARMISD::VLD3DUP_UPD;
15637 NumVecs = 3;
15638 break;
15639 case Intrinsic::arm_neon_vld4dup:
15640 NewOpc = ARMISD::VLD4DUP_UPD;
15641 NumVecs = 4;
15642 break;
15643 case Intrinsic::arm_neon_vld2lane:
15644 NewOpc = ARMISD::VLD2LN_UPD;
15645 NumVecs = 2;
15646 isLaneOp = true;
15647 break;
15648 case Intrinsic::arm_neon_vld3lane:
15649 NewOpc = ARMISD::VLD3LN_UPD;
15650 NumVecs = 3;
15651 isLaneOp = true;
15652 break;
15653 case Intrinsic::arm_neon_vld4lane:
15654 NewOpc = ARMISD::VLD4LN_UPD;
15655 NumVecs = 4;
15656 isLaneOp = true;
15657 break;
15658 case Intrinsic::arm_neon_vst1:
15659 NewOpc = ARMISD::VST1_UPD;
15660 NumVecs = 1;
15661 isLoadOp = false;
15662 break;
15663 case Intrinsic::arm_neon_vst2:
15664 NewOpc = ARMISD::VST2_UPD;
15665 NumVecs = 2;
15666 isLoadOp = false;
15667 break;
15668 case Intrinsic::arm_neon_vst3:
15669 NewOpc = ARMISD::VST3_UPD;
15670 NumVecs = 3;
15671 isLoadOp = false;
15672 break;
15673 case Intrinsic::arm_neon_vst4:
15674 NewOpc = ARMISD::VST4_UPD;
15675 NumVecs = 4;
15676 isLoadOp = false;
15677 break;
15678 case Intrinsic::arm_neon_vst2lane:
15679 NewOpc = ARMISD::VST2LN_UPD;
15680 NumVecs = 2;
15681 isLoadOp = false;
15682 isLaneOp = true;
15683 break;
15684 case Intrinsic::arm_neon_vst3lane:
15685 NewOpc = ARMISD::VST3LN_UPD;
15686 NumVecs = 3;
15687 isLoadOp = false;
15688 isLaneOp = true;
15689 break;
15690 case Intrinsic::arm_neon_vst4lane:
15691 NewOpc = ARMISD::VST4LN_UPD;
15692 NumVecs = 4;
15693 isLoadOp = false;
15694 isLaneOp = true;
15695 break;
15696 case Intrinsic::arm_neon_vst1x2:
15697 NewOpc = ARMISD::VST1x2_UPD;
15698 NumVecs = 2;
15699 isLoadOp = false;
15700 hasAlignment = false;
15701 break;
15702 case Intrinsic::arm_neon_vst1x3:
15703 NewOpc = ARMISD::VST1x3_UPD;
15704 NumVecs = 3;
15705 isLoadOp = false;
15706 hasAlignment = false;
15707 break;
15708 case Intrinsic::arm_neon_vst1x4:
15709 NewOpc = ARMISD::VST1x4_UPD;
15710 NumVecs = 4;
15711 isLoadOp = false;
15712 hasAlignment = false;
15713 break;
15714 }
15715 } else {
15716 isLaneOp = true;
15717 switch (N->getOpcode()) {
15718 default:
15719 llvm_unreachable("unexpected opcode for Neon base update")::llvm::llvm_unreachable_internal("unexpected opcode for Neon base update"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15719)
;
15720 case ARMISD::VLD1DUP:
15721 NewOpc = ARMISD::VLD1DUP_UPD;
15722 NumVecs = 1;
15723 break;
15724 case ARMISD::VLD2DUP:
15725 NewOpc = ARMISD::VLD2DUP_UPD;
15726 NumVecs = 2;
15727 break;
15728 case ARMISD::VLD3DUP:
15729 NewOpc = ARMISD::VLD3DUP_UPD;
15730 NumVecs = 3;
15731 break;
15732 case ARMISD::VLD4DUP:
15733 NewOpc = ARMISD::VLD4DUP_UPD;
15734 NumVecs = 4;
15735 break;
15736 case ISD::LOAD:
15737 NewOpc = ARMISD::VLD1_UPD;
15738 NumVecs = 1;
15739 isLaneOp = false;
15740 break;
15741 case ISD::STORE:
15742 NewOpc = ARMISD::VST1_UPD;
15743 NumVecs = 1;
15744 isLaneOp = false;
15745 isLoadOp = false;
15746 break;
15747 }
15748 }
15749
15750 // Find the size of memory referenced by the load/store.
15751 EVT VecTy;
15752 if (isLoadOp) {
15753 VecTy = N->getValueType(0);
15754 } else if (Target.isIntrinsic) {
15755 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15756 } else {
15757 assert(Target.isStore &&(static_cast <bool> (Target.isStore && "Node has to be a load, a store, or an intrinsic!"
) ? void (0) : __assert_fail ("Target.isStore && \"Node has to be a load, a store, or an intrinsic!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15758, __extension__
__PRETTY_FUNCTION__))
15758 "Node has to be a load, a store, or an intrinsic!")(static_cast <bool> (Target.isStore && "Node has to be a load, a store, or an intrinsic!"
) ? void (0) : __assert_fail ("Target.isStore && \"Node has to be a load, a store, or an intrinsic!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15758, __extension__
__PRETTY_FUNCTION__))
;
15759 VecTy = N->getOperand(1).getValueType();
15760 }
15761
15762 bool isVLDDUPOp =
15763 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15764 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15765
15766 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15767 if (isLaneOp || isVLDDUPOp)
15768 NumBytes /= VecTy.getVectorNumElements();
15769
15770 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15771 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15772 // separate instructions that make it harder to use a non-constant update.
15773 return false;
15774 }
15775
15776 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15777 return false;
15778
15779 // OK, we found an ADD we can fold into the base update.
15780 // Now, create a _UPD node, taking care of not breaking alignment.
15781
15782 EVT AlignedVecTy = VecTy;
15783 Align Alignment = MemN->getAlign();
15784
15785 // If this is a less-than-standard-aligned load/store, change the type to
15786 // match the standard alignment.
15787 // The alignment is overlooked when selecting _UPD variants; and it's
15788 // easier to introduce bitcasts here than fix that.
15789 // There are 3 ways to get to this base-update combine:
15790 // - intrinsics: they are assumed to be properly aligned (to the standard
15791 // alignment of the memory type), so we don't need to do anything.
15792 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15793 // intrinsics, so, likewise, there's nothing to do.
15794 // - generic load/store instructions: the alignment is specified as an
15795 // explicit operand, rather than implicitly as the standard alignment
15796 // of the memory type (like the intrisics). We need to change the
15797 // memory type to match the explicit alignment. That way, we don't
15798 // generate non-standard-aligned ARMISD::VLDx nodes.
15799 if (isa<LSBaseSDNode>(N)) {
15800 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15801 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15802 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.")(static_cast <bool> (NumVecs == 1 && "Unexpected multi-element generic load/store."
) ? void (0) : __assert_fail ("NumVecs == 1 && \"Unexpected multi-element generic load/store.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15802, __extension__
__PRETTY_FUNCTION__))
;
15803 assert(!isLaneOp && "Unexpected generic load/store lane.")(static_cast <bool> (!isLaneOp && "Unexpected generic load/store lane."
) ? void (0) : __assert_fail ("!isLaneOp && \"Unexpected generic load/store lane.\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 15803, __extension__
__PRETTY_FUNCTION__))
;
15804 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15805 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15806 }
15807 // Don't set an explicit alignment on regular load/stores that we want
15808 // to transform to VLD/VST 1_UPD nodes.
15809 // This matches the behavior of regular load/stores, which only get an
15810 // explicit alignment if the MMO alignment is larger than the standard
15811 // alignment of the memory type.
15812 // Intrinsics, however, always get an explicit alignment, set to the
15813 // alignment of the MMO.
15814 Alignment = Align(1);
15815 }
15816
15817 // Create the new updating load/store node.
15818 // First, create an SDVTList for the new updating node's results.
15819 EVT Tys[6];
15820 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
15821 unsigned n;
15822 for (n = 0; n < NumResultVecs; ++n)
15823 Tys[n] = AlignedVecTy;
15824 Tys[n++] = MVT::i32;
15825 Tys[n] = MVT::Other;
15826 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
15827
15828 // Then, gather the new node's operands.
15829 SmallVector<SDValue, 8> Ops;
15830 Ops.push_back(N->getOperand(0)); // incoming chain
15831 Ops.push_back(N->getOperand(Target.AddrOpIdx));
15832 Ops.push_back(User.Inc);
15833
15834 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
15835 // Try to match the intrinsic's signature
15836 Ops.push_back(StN->getValue());
15837 } else {
15838 // Loads (and of course intrinsics) match the intrinsics' signature,
15839 // so just add all but the alignment operand.
15840 unsigned LastOperand =
15841 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
15842 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
15843 Ops.push_back(N->getOperand(i));
15844 }
15845
15846 // For all node types, the alignment operand is always the last one.
15847 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
15848
15849 // If this is a non-standard-aligned STORE, the penultimate operand is the
15850 // stored value. Bitcast it to the aligned type.
15851 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
15852 SDValue &StVal = Ops[Ops.size() - 2];
15853 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
15854 }
15855
15856 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
15857 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
15858 MemN->getMemOperand());
15859
15860 // Update the uses.
15861 SmallVector<SDValue, 5> NewResults;
15862 for (unsigned i = 0; i < NumResultVecs; ++i)
15863 NewResults.push_back(SDValue(UpdN.getNode(), i));
15864
15865 // If this is an non-standard-aligned LOAD, the first result is the loaded
15866 // value. Bitcast it to the expected result type.
15867 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
15868 SDValue &LdVal = NewResults[0];
15869 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
15870 }
15871
15872 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
15873 DCI.CombineTo(N, NewResults);
15874 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
15875
15876 return true;
15877}
15878
15879// If (opcode ptr inc) is and ADD-like instruction, return the
15880// increment value. Otherwise return 0.
15881static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
15882 SDValue Inc, const SelectionDAG &DAG) {
15883 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
15884 if (!CInc)
15885 return 0;
15886
15887 switch (Opcode) {
15888 case ARMISD::VLD1_UPD:
15889 case ISD::ADD:
15890 return CInc->getZExtValue();
15891 case ISD::OR: {
15892 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
15893 // (OR ptr inc) is the same as (ADD ptr inc)
15894 return CInc->getZExtValue();
15895 }
15896 return 0;
15897 }
15898 default:
15899 return 0;
15900 }
15901}
15902
15903static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
15904 switch (N->getOpcode()) {
15905 case ISD::ADD:
15906 case ISD::OR: {
15907 if (isa<ConstantSDNode>(N->getOperand(1))) {
15908 *Ptr = N->getOperand(0);
15909 *CInc = N->getOperand(1);
15910 return true;
15911 }
15912 return false;
15913 }
15914 case ARMISD::VLD1_UPD: {
15915 if (isa<ConstantSDNode>(N->getOperand(2))) {
15916 *Ptr = N->getOperand(1);
15917 *CInc = N->getOperand(2);
15918 return true;
15919 }
15920 return false;
15921 }
15922 default:
15923 return false;
15924 }
15925}
15926
15927static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15928 // Check that the add is independent of the load/store.
15929 // Otherwise, folding it would create a cycle. Search through Addr
15930 // as well, since the User may not be a direct user of Addr and
15931 // only share a base pointer.
15932 SmallPtrSet<const SDNode *, 32> Visited;
15933 SmallVector<const SDNode *, 16> Worklist;
15934 Worklist.push_back(N);
15935 Worklist.push_back(User);
15936 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15937 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15938 return false;
15939 return true;
15940}
15941
15942/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
15943/// NEON load/store intrinsics, and generic vector load/stores, to merge
15944/// base address updates.
15945/// For generic load/stores, the memory type is assumed to be a vector.
15946/// The caller is assumed to have checked legality.
15947static SDValue CombineBaseUpdate(SDNode *N,
15948 TargetLowering::DAGCombinerInfo &DCI) {
15949 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
15950 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
15951 const bool isStore = N->getOpcode() == ISD::STORE;
15952 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
15953 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
15954
15955 SDValue Addr = N->getOperand(AddrOpIdx);
15956
15957 SmallVector<BaseUpdateUser, 8> BaseUpdates;
15958
15959 // Search for a use of the address operand that is an increment.
15960 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15961 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15962 SDNode *User = *UI;
15963 if (UI.getUse().getResNo() != Addr.getResNo() ||
15964 User->getNumOperands() != 2)
15965 continue;
15966
15967 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
15968 unsigned ConstInc =
15969 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
15970
15971 if (ConstInc || User->getOpcode() == ISD::ADD)
15972 BaseUpdates.push_back({User, Inc, ConstInc});
15973 }
15974
15975 // If the address is a constant pointer increment itself, find
15976 // another constant increment that has the same base operand
15977 SDValue Base;
15978 SDValue CInc;
15979 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
15980 unsigned Offset =
15981 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
15982 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
15983 UI != UE; ++UI) {
15984
15985 SDNode *User = *UI;
15986 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
15987 User->getNumOperands() != 2)
15988 continue;
15989
15990 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
15991 unsigned UserOffset =
15992 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
15993
15994 if (!UserOffset || UserOffset <= Offset)
15995 continue;
15996
15997 unsigned NewConstInc = UserOffset - Offset;
15998 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
15999 BaseUpdates.push_back({User, NewInc, NewConstInc});
16000 }
16001 }
16002
16003 // Try to fold the load/store with an update that matches memory
16004 // access size. This should work well for sequential loads.
16005 //
16006 // Filter out invalid updates as well.
16007 unsigned NumValidUpd = BaseUpdates.size();
16008 for (unsigned I = 0; I < NumValidUpd;) {
16009 BaseUpdateUser &User = BaseUpdates[I];
16010 if (!isValidBaseUpdate(N, User.N)) {
16011 --NumValidUpd;
16012 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16013 continue;
16014 }
16015
16016 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16017 return SDValue();
16018 ++I;
16019 }
16020 BaseUpdates.resize(NumValidUpd);
16021
16022 // Try to fold with other users. Non-constant updates are considered
16023 // first, and constant updates are sorted to not break a sequence of
16024 // strided accesses (if there is any).
16025 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16026 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16027 return LHS.ConstInc < RHS.ConstInc;
16028 });
16029 for (BaseUpdateUser &User : BaseUpdates) {
16030 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16031 return SDValue();
16032 }
16033 return SDValue();
16034}
16035
16036static SDValue PerformVLDCombine(SDNode *N,
16037 TargetLowering::DAGCombinerInfo &DCI) {
16038 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16039 return SDValue();
16040
16041 return CombineBaseUpdate(N, DCI);
16042}
16043
16044static SDValue PerformMVEVLDCombine(SDNode *N,
16045 TargetLowering::DAGCombinerInfo &DCI) {
16046 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16047 return SDValue();
16048
16049 SelectionDAG &DAG = DCI.DAG;
16050 SDValue Addr = N->getOperand(2);
16051 MemSDNode *MemN = cast<MemSDNode>(N);
16052 SDLoc dl(N);
16053
16054 // For the stores, where there are multiple intrinsics we only actually want
16055 // to post-inc the last of the them.
16056 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
16057 if (IntNo == Intrinsic::arm_mve_vst2q &&
16058 cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1)
16059 return SDValue();
16060 if (IntNo == Intrinsic::arm_mve_vst4q &&
16061 cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3)
16062 return SDValue();
16063
16064 // Search for a use of the address operand that is an increment.
16065 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16066 UE = Addr.getNode()->use_end();
16067 UI != UE; ++UI) {
16068 SDNode *User = *UI;
16069 if (User->getOpcode() != ISD::ADD ||
16070 UI.getUse().getResNo() != Addr.getResNo())
16071 continue;
16072
16073 // Check that the add is independent of the load/store. Otherwise, folding
16074 // it would create a cycle. We can avoid searching through Addr as it's a
16075 // predecessor to both.
16076 SmallPtrSet<const SDNode *, 32> Visited;
16077 SmallVector<const SDNode *, 16> Worklist;
16078 Visited.insert(Addr.getNode());
16079 Worklist.push_back(N);
16080 Worklist.push_back(User);
16081 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16082 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16083 continue;
16084
16085 // Find the new opcode for the updating load/store.
16086 bool isLoadOp = true;
16087 unsigned NewOpc = 0;
16088 unsigned NumVecs = 0;
16089 switch (IntNo) {
16090 default:
16091 llvm_unreachable("unexpected intrinsic for MVE VLDn combine")::llvm::llvm_unreachable_internal("unexpected intrinsic for MVE VLDn combine"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16091)
;
16092 case Intrinsic::arm_mve_vld2q:
16093 NewOpc = ARMISD::VLD2_UPD;
16094 NumVecs = 2;
16095 break;
16096 case Intrinsic::arm_mve_vld4q:
16097 NewOpc = ARMISD::VLD4_UPD;
16098 NumVecs = 4;
16099 break;
16100 case Intrinsic::arm_mve_vst2q:
16101 NewOpc = ARMISD::VST2_UPD;
16102 NumVecs = 2;
16103 isLoadOp = false;
16104 break;
16105 case Intrinsic::arm_mve_vst4q:
16106 NewOpc = ARMISD::VST4_UPD;
16107 NumVecs = 4;
16108 isLoadOp = false;
16109 break;
16110 }
16111
16112 // Find the size of memory referenced by the load/store.
16113 EVT VecTy;
16114 if (isLoadOp) {
16115 VecTy = N->getValueType(0);
16116 } else {
16117 VecTy = N->getOperand(3).getValueType();
16118 }
16119
16120 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16121
16122 // If the increment is a constant, it must match the memory ref size.
16123 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16124 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16125 if (!CInc || CInc->getZExtValue() != NumBytes)
16126 continue;
16127
16128 // Create the new updating load/store node.
16129 // First, create an SDVTList for the new updating node's results.
16130 EVT Tys[6];
16131 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16132 unsigned n;
16133 for (n = 0; n < NumResultVecs; ++n)
16134 Tys[n] = VecTy;
16135 Tys[n++] = MVT::i32;
16136 Tys[n] = MVT::Other;
16137 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
16138
16139 // Then, gather the new node's operands.
16140 SmallVector<SDValue, 8> Ops;
16141 Ops.push_back(N->getOperand(0)); // incoming chain
16142 Ops.push_back(N->getOperand(2)); // ptr
16143 Ops.push_back(Inc);
16144
16145 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16146 Ops.push_back(N->getOperand(i));
16147
16148 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16149 MemN->getMemOperand());
16150
16151 // Update the uses.
16152 SmallVector<SDValue, 5> NewResults;
16153 for (unsigned i = 0; i < NumResultVecs; ++i)
16154 NewResults.push_back(SDValue(UpdN.getNode(), i));
16155
16156 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16157 DCI.CombineTo(N, NewResults);
16158 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16159
16160 break;
16161 }
16162
16163 return SDValue();
16164}
16165
16166/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16167/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16168/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16169/// return true.
16170static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
16171 SelectionDAG &DAG = DCI.DAG;
16172 EVT VT = N->getValueType(0);
16173 // vldN-dup instructions only support 64-bit vectors for N > 1.
16174 if (!VT.is64BitVector())
16175 return false;
16176
16177 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16178 SDNode *VLD = N->getOperand(0).getNode();
16179 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16180 return false;
16181 unsigned NumVecs = 0;
16182 unsigned NewOpc = 0;
16183 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
16184 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16185 NumVecs = 2;
16186 NewOpc = ARMISD::VLD2DUP;
16187 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16188 NumVecs = 3;
16189 NewOpc = ARMISD::VLD3DUP;
16190 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16191 NumVecs = 4;
16192 NewOpc = ARMISD::VLD4DUP;
16193 } else {
16194 return false;
16195 }
16196
16197 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16198 // numbers match the load.
16199 unsigned VLDLaneNo =
16200 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
16201 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16202 UI != UE; ++UI) {
16203 // Ignore uses of the chain result.
16204 if (UI.getUse().getResNo() == NumVecs)
16205 continue;
16206 SDNode *User = *UI;
16207 if (User->getOpcode() != ARMISD::VDUPLANE ||
16208 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
16209 return false;
16210 }
16211
16212 // Create the vldN-dup node.
16213 EVT Tys[5];
16214 unsigned n;
16215 for (n = 0; n < NumVecs; ++n)
16216 Tys[n] = VT;
16217 Tys[n] = MVT::Other;
16218 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1));
16219 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16220 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16221 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16222 Ops, VLDMemInt->getMemoryVT(),
16223 VLDMemInt->getMemOperand());
16224
16225 // Update the uses.
16226 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16227 UI != UE; ++UI) {
16228 unsigned ResNo = UI.getUse().getResNo();
16229 // Ignore uses of the chain result.
16230 if (ResNo == NumVecs)
16231 continue;
16232 SDNode *User = *UI;
16233 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16234 }
16235
16236 // Now the vldN-lane intrinsic is dead except for its chain result.
16237 // Update uses of the chain.
16238 std::vector<SDValue> VLDDupResults;
16239 for (unsigned n = 0; n < NumVecs; ++n)
16240 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16241 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16242 DCI.CombineTo(VLD, VLDDupResults);
16243
16244 return true;
16245}
16246
16247/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16248/// ARMISD::VDUPLANE.
16249static SDValue PerformVDUPLANECombine(SDNode *N,
16250 TargetLowering::DAGCombinerInfo &DCI,
16251 const ARMSubtarget *Subtarget) {
16252 SDValue Op = N->getOperand(0);
16253 EVT VT = N->getValueType(0);
16254
16255 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16256 if (Subtarget->hasMVEIntegerOps()) {
16257 EVT ExtractVT = VT.getVectorElementType();
16258 // We need to ensure we are creating a legal type.
16259 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16260 ExtractVT = MVT::i32;
16261 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16262 N->getOperand(0), N->getOperand(1));
16263 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16264 }
16265
16266 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16267 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16268 if (CombineVLDDUP(N, DCI))
16269 return SDValue(N, 0);
16270
16271 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16272 // redundant. Ignore bit_converts for now; element sizes are checked below.
16273 while (Op.getOpcode() == ISD::BITCAST)
16274 Op = Op.getOperand(0);
16275 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16276 return SDValue();
16277
16278 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16279 unsigned EltSize = Op.getScalarValueSizeInBits();
16280 // The canonical VMOV for a zero vector uses a 32-bit element size.
16281 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16282 unsigned EltBits;
16283 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16284 EltSize = 8;
16285 if (EltSize > VT.getScalarSizeInBits())
16286 return SDValue();
16287
16288 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16289}
16290
16291/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16292static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
16293 const ARMSubtarget *Subtarget) {
16294 SDValue Op = N->getOperand(0);
16295 SDLoc dl(N);
16296
16297 if (Subtarget->hasMVEIntegerOps()) {
16298 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16299 // need to come from a GPR.
16300 if (Op.getValueType() == MVT::f32)
16301 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16302 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16303 else if (Op.getValueType() == MVT::f16)
16304 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16305 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16306 }
16307
16308 if (!Subtarget->hasNEON())
16309 return SDValue();
16310
16311 // Match VDUP(LOAD) -> VLD1DUP.
16312 // We match this pattern here rather than waiting for isel because the
16313 // transform is only legal for unindexed loads.
16314 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16315 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16316 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16317 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16318 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16319 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16320 SDValue VLDDup =
16321 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16322 LD->getMemoryVT(), LD->getMemOperand());
16323 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16324 return VLDDup;
16325 }
16326
16327 return SDValue();
16328}
16329
16330static SDValue PerformLOADCombine(SDNode *N,
16331 TargetLowering::DAGCombinerInfo &DCI,
16332 const ARMSubtarget *Subtarget) {
16333 EVT VT = N->getValueType(0);
16334
16335 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16336 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16337 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
16338 return CombineBaseUpdate(N, DCI);
16339
16340 return SDValue();
16341}
16342
16343// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16344// pack all of the elements in one place. Next, store to memory in fewer
16345// chunks.
16346static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,
16347 SelectionDAG &DAG) {
16348 SDValue StVal = St->getValue();
16349 EVT VT = StVal.getValueType();
16350 if (!St->isTruncatingStore() || !VT.isVector())
16351 return SDValue();
16352 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16353 EVT StVT = St->getMemoryVT();
16354 unsigned NumElems = VT.getVectorNumElements();
16355 assert(StVT != VT && "Cannot truncate to the same type")(static_cast <bool> (StVT != VT && "Cannot truncate to the same type"
) ? void (0) : __assert_fail ("StVT != VT && \"Cannot truncate to the same type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16355, __extension__
__PRETTY_FUNCTION__))
;
16356 unsigned FromEltSz = VT.getScalarSizeInBits();
16357 unsigned ToEltSz = StVT.getScalarSizeInBits();
16358
16359 // From, To sizes and ElemCount must be pow of two
16360 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16361 return SDValue();
16362
16363 // We are going to use the original vector elt for storing.
16364 // Accumulated smaller vector elements must be a multiple of the store size.
16365 if (0 != (NumElems * FromEltSz) % ToEltSz)
16366 return SDValue();
16367
16368 unsigned SizeRatio = FromEltSz / ToEltSz;
16369 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits())(static_cast <bool> (SizeRatio * NumElems * ToEltSz == VT
.getSizeInBits()) ? void (0) : __assert_fail ("SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16369, __extension__
__PRETTY_FUNCTION__))
;
16370
16371 // Create a type on which we perform the shuffle.
16372 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16373 NumElems * SizeRatio);
16374 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (WideVecVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("WideVecVT.getSizeInBits() == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16374, __extension__
__PRETTY_FUNCTION__))
;
16375
16376 SDLoc DL(St);
16377 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16378 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16379 for (unsigned i = 0; i < NumElems; ++i)
16380 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16381 : i * SizeRatio;
16382
16383 // Can't shuffle using an illegal type.
16384 if (!TLI.isTypeLegal(WideVecVT))
16385 return SDValue();
16386
16387 SDValue Shuff = DAG.getVectorShuffle(
16388 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16389 // At this point all of the data is stored at the bottom of the
16390 // register. We now need to save it to mem.
16391
16392 // Find the largest store unit
16393 MVT StoreType = MVT::i8;
16394 for (MVT Tp : MVT::integer_valuetypes()) {
16395 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16396 StoreType = Tp;
16397 }
16398 // Didn't find a legal store type.
16399 if (!TLI.isTypeLegal(StoreType))
16400 return SDValue();
16401
16402 // Bitcast the original vector into a vector of store-size units
16403 EVT StoreVecVT =
16404 EVT::getVectorVT(*DAG.getContext(), StoreType,
16405 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16406 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (StoreVecVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("StoreVecVT.getSizeInBits() == VT.getSizeInBits()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16406, __extension__
__PRETTY_FUNCTION__))
;
16407 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16408 SmallVector<SDValue, 8> Chains;
16409 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16410 TLI.getPointerTy(DAG.getDataLayout()));
16411 SDValue BasePtr = St->getBasePtr();
16412
16413 // Perform one or more big stores into memory.
16414 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16415 for (unsigned I = 0; I < E; I++) {
16416 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16417 ShuffWide, DAG.getIntPtrConstant(I, DL));
16418 SDValue Ch =
16419 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16420 St->getAlign(), St->getMemOperand()->getFlags());
16421 BasePtr =
16422 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16423 Chains.push_back(Ch);
16424 }
16425 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16426}
16427
16428// Try taking a single vector store from an fpround (which would otherwise turn
16429// into an expensive buildvector) and splitting it into a series of narrowing
16430// stores.
16431static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,
16432 SelectionDAG &DAG) {
16433 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16434 return SDValue();
16435 SDValue Trunc = St->getValue();
16436 if (Trunc->getOpcode() != ISD::FP_ROUND)
16437 return SDValue();
16438 EVT FromVT = Trunc->getOperand(0).getValueType();
16439 EVT ToVT = Trunc.getValueType();
16440 if (!ToVT.isVector())
16441 return SDValue();
16442 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements()) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16442, __extension__
__PRETTY_FUNCTION__))
;
16443 EVT ToEltVT = ToVT.getVectorElementType();
16444 EVT FromEltVT = FromVT.getVectorElementType();
16445
16446 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16447 return SDValue();
16448
16449 unsigned NumElements = 4;
16450 if (FromVT.getVectorNumElements() % NumElements != 0)
16451 return SDValue();
16452
16453 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16454 // use the VMOVN over splitting the store. We are looking for patterns of:
16455 // !rev: 0 N 1 N+1 2 N+2 ...
16456 // rev: N 0 N+1 1 N+2 2 ...
16457 // The shuffle may either be a single source (in which case N = NumElts/2) or
16458 // two inputs extended with concat to the same size (in which case N =
16459 // NumElts).
16460 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16461 ArrayRef<int> M = SVN->getMask();
16462 unsigned NumElts = ToVT.getVectorNumElements();
16463 if (SVN->getOperand(1).isUndef())
16464 NumElts /= 2;
16465
16466 unsigned Off0 = Rev ? NumElts : 0;
16467 unsigned Off1 = Rev ? 0 : NumElts;
16468
16469 for (unsigned I = 0; I < NumElts; I += 2) {
16470 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16471 return false;
16472 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16473 return false;
16474 }
16475
16476 return true;
16477 };
16478
16479 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16480 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16481 return SDValue();
16482
16483 LLVMContext &C = *DAG.getContext();
16484 SDLoc DL(St);
16485 // Details about the old store
16486 SDValue Ch = St->getChain();
16487 SDValue BasePtr = St->getBasePtr();
16488 Align Alignment = St->getOriginalAlign();
16489 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16490 AAMDNodes AAInfo = St->getAAInfo();
16491
16492 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16493 // and then stored as truncating integer stores.
16494 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16495 EVT NewToVT = EVT::getVectorVT(
16496 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16497
16498 SmallVector<SDValue, 4> Stores;
16499 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16500 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16501 SDValue NewPtr =
16502 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16503
16504 SDValue Extract =
16505 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16506 DAG.getConstant(i * NumElements, DL, MVT::i32));
16507
16508 SDValue FPTrunc =
16509 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16510 Extract, DAG.getConstant(0, DL, MVT::i32));
16511 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16512
16513 SDValue Store = DAG.getTruncStore(
16514 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16515 NewToVT, Alignment.value(), MMOFlags, AAInfo);
16516 Stores.push_back(Store);
16517 }
16518 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16519}
16520
16521// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16522// into an expensive buildvector) and splitting it into a series of narrowing
16523// stores.
16524static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St,
16525 SelectionDAG &DAG) {
16526 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16527 return SDValue();
16528 SDValue Trunc = St->getValue();
16529 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16530 return SDValue();
16531 EVT FromVT = Trunc->getOperand(0).getValueType();
16532 EVT ToVT = Trunc.getValueType();
16533
16534 LLVMContext &C = *DAG.getContext();
16535 SDLoc DL(St);
16536 // Details about the old store
16537 SDValue Ch = St->getChain();
16538 SDValue BasePtr = St->getBasePtr();
16539 Align Alignment = St->getOriginalAlign();
16540 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16541 AAMDNodes AAInfo = St->getAAInfo();
16542
16543 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16544 FromVT.getVectorNumElements());
16545
16546 SmallVector<SDValue, 4> Stores;
16547 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16548 unsigned NewOffset =
16549 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16550 SDValue NewPtr =
16551 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
16552
16553 SDValue Extract = Trunc.getOperand(i);
16554 SDValue Store = DAG.getTruncStore(
16555 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16556 NewToVT, Alignment.value(), MMOFlags, AAInfo);
16557 Stores.push_back(Store);
16558 }
16559 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16560}
16561
16562// Given a floating point store from an extracted vector, with an integer
16563// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16564// help reduce fp register pressure, doesn't require the fp extract and allows
16565// use of more integer post-inc stores not available with vstr.
16566static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG) {
16567 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16568 return SDValue();
16569 SDValue Extract = St->getValue();
16570 EVT VT = Extract.getValueType();
16571 // For now only uses f16. This may be useful for f32 too, but that will
16572 // be bitcast(extract), not the VGETLANEu we currently check here.
16573 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16574 return SDValue();
16575
16576 SDNode *GetLane =
16577 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16578 {Extract.getOperand(0), Extract.getOperand(1)});
16579 if (!GetLane)
16580 return SDValue();
16581
16582 LLVMContext &C = *DAG.getContext();
16583 SDLoc DL(St);
16584 // Create a new integer store to replace the existing floating point version.
16585 SDValue Ch = St->getChain();
16586 SDValue BasePtr = St->getBasePtr();
16587 Align Alignment = St->getOriginalAlign();
16588 MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();
16589 AAMDNodes AAInfo = St->getAAInfo();
16590 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16591 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16592 St->getPointerInfo(), NewToVT,
16593 Alignment.value(), MMOFlags, AAInfo);
16594
16595 return Store;
16596}
16597
16598/// PerformSTORECombine - Target-specific dag combine xforms for
16599/// ISD::STORE.
16600static SDValue PerformSTORECombine(SDNode *N,
16601 TargetLowering::DAGCombinerInfo &DCI,
16602 const ARMSubtarget *Subtarget) {
16603 StoreSDNode *St = cast<StoreSDNode>(N);
16604 if (St->isVolatile())
16605 return SDValue();
16606 SDValue StVal = St->getValue();
16607 EVT VT = StVal.getValueType();
16608
16609 if (Subtarget->hasNEON())
16610 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16611 return Store;
16612
16613 if (Subtarget->hasMVEIntegerOps()) {
16614 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16615 return NewToken;
16616 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16617 return NewChain;
16618 if (SDValue NewToken =
16619 PerformSplittingMVETruncToNarrowingStores(St, DCI.DAG))
16620 return NewToken;
16621 }
16622
16623 if (!ISD::isNormalStore(St))
16624 return SDValue();
16625
16626 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16627 // ARM stores of arguments in the same cache line.
16628 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16629 StVal.getNode()->hasOneUse()) {
16630 SelectionDAG &DAG = DCI.DAG;
16631 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16632 SDLoc DL(St);
16633 SDValue BasePtr = St->getBasePtr();
16634 SDValue NewST1 = DAG.getStore(
16635 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16636 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16637 St->getMemOperand()->getFlags());
16638
16639 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16640 DAG.getConstant(4, DL, MVT::i32));
16641 return DAG.getStore(NewST1.getValue(0), DL,
16642 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16643 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16644 St->getOriginalAlign(),
16645 St->getMemOperand()->getFlags());
16646 }
16647
16648 if (StVal.getValueType() == MVT::i64 &&
16649 StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16650
16651 // Bitcast an i64 store extracted from a vector to f64.
16652 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16653 SelectionDAG &DAG = DCI.DAG;
16654 SDLoc dl(StVal);
16655 SDValue IntVec = StVal.getOperand(0);
16656 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16657 IntVec.getValueType().getVectorNumElements());
16658 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16659 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16660 Vec, StVal.getOperand(1));
16661 dl = SDLoc(N);
16662 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16663 // Make the DAGCombiner fold the bitcasts.
16664 DCI.AddToWorklist(Vec.getNode());
16665 DCI.AddToWorklist(ExtElt.getNode());
16666 DCI.AddToWorklist(V.getNode());
16667 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16668 St->getPointerInfo(), St->getAlign(),
16669 St->getMemOperand()->getFlags(), St->getAAInfo());
16670 }
16671
16672 // If this is a legal vector store, try to combine it into a VST1_UPD.
16673 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16674 DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
16675 return CombineBaseUpdate(N, DCI);
16676
16677 return SDValue();
16678}
16679
16680/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16681/// can replace combinations of VMUL and VCVT (floating-point to integer)
16682/// when the VMUL has a constant operand that is a power of 2.
16683///
16684/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16685/// vmul.f32 d16, d17, d16
16686/// vcvt.s32.f32 d16, d16
16687/// becomes:
16688/// vcvt.s32.f32 d16, d16, #3
16689static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
16690 const ARMSubtarget *Subtarget) {
16691 if (!Subtarget->hasNEON())
16692 return SDValue();
16693
16694 SDValue Op = N->getOperand(0);
16695 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16696 Op.getOpcode() != ISD::FMUL)
16697 return SDValue();
16698
16699 SDValue ConstVec = Op->getOperand(1);
16700 if (!isa<BuildVectorSDNode>(ConstVec))
16701 return SDValue();
16702
16703 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16704 uint32_t FloatBits = FloatTy.getSizeInBits();
16705 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16706 uint32_t IntBits = IntTy.getSizeInBits();
16707 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16708 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16709 // These instructions only exist converting from f32 to i32. We can handle
16710 // smaller integers by generating an extra truncate, but larger ones would
16711 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16712 // these intructions only support v2i32/v4i32 types.
16713 return SDValue();
16714 }
16715
16716 BitVector UndefElements;
16717 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16718 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16719 if (C == -1 || C == 0 || C > 32)
16720 return SDValue();
16721
16722 SDLoc dl(N);
16723 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16724 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16725 Intrinsic::arm_neon_vcvtfp2fxu;
16726 SDValue FixConv = DAG.getNode(
16727 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16728 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16729 DAG.getConstant(C, dl, MVT::i32));
16730
16731 if (IntBits < FloatBits)
16732 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16733
16734 return FixConv;
16735}
16736
16737static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
16738 const ARMSubtarget *Subtarget) {
16739 if (!Subtarget->hasMVEFloatOps())
16740 return SDValue();
16741
16742 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16743 // The second form can be more easily turned into a predicated vadd, and
16744 // possibly combined into a fma to become a predicated vfma.
16745 SDValue Op0 = N->getOperand(0);
16746 SDValue Op1 = N->getOperand(1);
16747 EVT VT = N->getValueType(0);
16748 SDLoc DL(N);
16749
16750 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16751 // which these VMOV's represent.
16752 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16753 if (Op.getOpcode() != ISD::BITCAST ||
16754 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16755 return false;
16756 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16757 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16758 return true;
16759 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16760 return true;
16761 return false;
16762 };
16763
16764 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16765 std::swap(Op0, Op1);
16766
16767 if (Op1.getOpcode() != ISD::VSELECT)
16768 return SDValue();
16769
16770 SDNodeFlags FaddFlags = N->getFlags();
16771 bool NSZ = FaddFlags.hasNoSignedZeros();
16772 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16773 return SDValue();
16774
16775 SDValue FAdd =
16776 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16777 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16778}
16779
16780/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
16781/// can replace combinations of VCVT (integer to floating-point) and VDIV
16782/// when the VDIV has a constant operand that is a power of 2.
16783///
16784/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16785/// vcvt.f32.s32 d16, d16
16786/// vdiv.f32 d16, d17, d16
16787/// becomes:
16788/// vcvt.f32.s32 d16, d16, #3
16789static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
16790 const ARMSubtarget *Subtarget) {
16791 if (!Subtarget->hasNEON())
16792 return SDValue();
16793
16794 SDValue Op = N->getOperand(0);
16795 unsigned OpOpcode = Op.getNode()->getOpcode();
16796 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
16797 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
16798 return SDValue();
16799
16800 SDValue ConstVec = N->getOperand(1);
16801 if (!isa<BuildVectorSDNode>(ConstVec))
16802 return SDValue();
16803
16804 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
16805 uint32_t FloatBits = FloatTy.getSizeInBits();
16806 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
16807 uint32_t IntBits = IntTy.getSizeInBits();
16808 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16809 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16810 // These instructions only exist converting from i32 to f32. We can handle
16811 // smaller integers by generating an extra extend, but larger ones would
16812 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16813 // these intructions only support v2i32/v4i32 types.
16814 return SDValue();
16815 }
16816
16817 BitVector UndefElements;
16818 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16819 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16820 if (C == -1 || C == 0 || C > 32)
16821 return SDValue();
16822
16823 SDLoc dl(N);
16824 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
16825 SDValue ConvInput = Op.getOperand(0);
16826 if (IntBits < FloatBits)
16827 ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
16828 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16829 ConvInput);
16830
16831 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
16832 Intrinsic::arm_neon_vcvtfxu2fp;
16833 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
16834 Op.getValueType(),
16835 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
16836 ConvInput, DAG.getConstant(C, dl, MVT::i32));
16837}
16838
16839static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
16840 const ARMSubtarget *ST) {
16841 if (!ST->hasMVEIntegerOps())
16842 return SDValue();
16843
16844 assert(N->getOpcode() == ISD::VECREDUCE_ADD)(static_cast <bool> (N->getOpcode() == ISD::VECREDUCE_ADD
) ? void (0) : __assert_fail ("N->getOpcode() == ISD::VECREDUCE_ADD"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16844, __extension__
__PRETTY_FUNCTION__))
;
16845 EVT ResVT = N->getValueType(0);
16846 SDValue N0 = N->getOperand(0);
16847 SDLoc dl(N);
16848
16849 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
16850 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
16851 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
16852 N0.getValueType() == MVT::v16i8)) {
16853 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
16854 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
16855 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
16856 }
16857
16858 // We are looking for something that will have illegal types if left alone,
16859 // but that we can convert to a single instruction under MVE. For example
16860 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
16861 // or
16862 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
16863
16864 // The legal cases are:
16865 // VADDV u/s 8/16/32
16866 // VMLAV u/s 8/16/32
16867 // VADDLV u/s 32
16868 // VMLALV u/s 16/32
16869
16870 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
16871 // extend it and use v4i32 instead.
16872 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
16873 EVT AVT = A.getValueType();
16874 return any_of(ExtTypes, [&](MVT Ty) {
16875 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
16876 AVT.bitsLE(Ty);
16877 });
16878 };
16879 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
16880 EVT AVT = A.getValueType();
16881 if (!AVT.is128BitVector())
16882 A = DAG.getNode(ExtendCode, dl,
16883 AVT.changeVectorElementType(MVT::getIntegerVT(
16884 128 / AVT.getVectorMinNumElements())),
16885 A);
16886 return A;
16887 };
16888 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
16889 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
16890 return SDValue();
16891 SDValue A = N0->getOperand(0);
16892 if (ExtTypeMatches(A, ExtTypes))
16893 return ExtendIfNeeded(A, ExtendCode);
16894 return SDValue();
16895 };
16896 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
16897 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
16898 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16899 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
16900 return SDValue();
16901 Mask = N0->getOperand(0);
16902 SDValue Ext = N0->getOperand(1);
16903 if (Ext->getOpcode() != ExtendCode)
16904 return SDValue();
16905 SDValue A = Ext->getOperand(0);
16906 if (ExtTypeMatches(A, ExtTypes))
16907 return ExtendIfNeeded(A, ExtendCode);
16908 return SDValue();
16909 };
16910 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16911 SDValue &A, SDValue &B) {
16912 // For a vmla we are trying to match a larger pattern:
16913 // ExtA = sext/zext A
16914 // ExtB = sext/zext B
16915 // Mul = mul ExtA, ExtB
16916 // vecreduce.add Mul
16917 // There might also be en extra extend between the mul and the addreduce, so
16918 // long as the bitwidth is high enough to make them equivalent (for example
16919 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
16920 if (ResVT != RetTy)
16921 return false;
16922 SDValue Mul = N0;
16923 if (Mul->getOpcode() == ExtendCode &&
16924 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16925 ResVT.getScalarSizeInBits())
16926 Mul = Mul->getOperand(0);
16927 if (Mul->getOpcode() != ISD::MUL)
16928 return false;
16929 SDValue ExtA = Mul->getOperand(0);
16930 SDValue ExtB = Mul->getOperand(1);
16931 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
16932 return false;
16933 A = ExtA->getOperand(0);
16934 B = ExtB->getOperand(0);
16935 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
16936 A = ExtendIfNeeded(A, ExtendCode);
16937 B = ExtendIfNeeded(B, ExtendCode);
16938 return true;
16939 }
16940 return false;
16941 };
16942 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
16943 SDValue &A, SDValue &B, SDValue &Mask) {
16944 // Same as the pattern above with a select for the zero predicated lanes
16945 // ExtA = sext/zext A
16946 // ExtB = sext/zext B
16947 // Mul = mul ExtA, ExtB
16948 // N0 = select Mask, Mul, 0
16949 // vecreduce.add N0
16950 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
16951 !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))
16952 return false;
16953 Mask = N0->getOperand(0);
16954 SDValue Mul = N0->getOperand(1);
16955 if (Mul->getOpcode() == ExtendCode &&
16956 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
16957 ResVT.getScalarSizeInBits())
16958 Mul = Mul->getOperand(0);
16959 if (Mul->getOpcode() != ISD::MUL)
16960 return false;
16961 SDValue ExtA = Mul->getOperand(0);
16962 SDValue ExtB = Mul->getOperand(1);
16963 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
16964 return false;
16965 A = ExtA->getOperand(0);
16966 B = ExtB->getOperand(0);
16967 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
16968 A = ExtendIfNeeded(A, ExtendCode);
16969 B = ExtendIfNeeded(B, ExtendCode);
16970 return true;
16971 }
16972 return false;
16973 };
16974 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
16975 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
16976 // reductions. The operands are extended with MVEEXT, but as they are
16977 // reductions the lane orders do not matter. MVEEXT may be combined with
16978 // loads to produce two extending loads, or else they will be expanded to
16979 // VREV/VMOVL.
16980 EVT VT = Ops[0].getValueType();
16981 if (VT == MVT::v16i8) {
16982 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&(static_cast <bool> ((Opcode == ARMISD::VMLALVs || Opcode
== ARMISD::VMLALVu) && "Unexpected illegal long reduction opcode"
) ? void (0) : __assert_fail ("(Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) && \"Unexpected illegal long reduction opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16983, __extension__
__PRETTY_FUNCTION__))
16983 "Unexpected illegal long reduction opcode")(static_cast <bool> ((Opcode == ARMISD::VMLALVs || Opcode
== ARMISD::VMLALVu) && "Unexpected illegal long reduction opcode"
) ? void (0) : __assert_fail ("(Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) && \"Unexpected illegal long reduction opcode\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 16983, __extension__
__PRETTY_FUNCTION__))
;
16984 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
16985
16986 SDValue Ext0 =
16987 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
16988 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
16989 SDValue Ext1 =
16990 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
16991 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
16992
16993 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
16994 Ext0, Ext1);
16995 SDValue MLA1 =
16996 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
16997 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
16998 Ext0.getValue(1), Ext1.getValue(1));
16999 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17000 }
17001 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17002 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17003 SDValue(Node.getNode(), 1));
17004 };
17005
17006 SDValue A, B;
17007 SDValue Mask;
17008 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17009 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17010 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17011 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17012 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17013 A, B))
17014 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17015 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17016 A, B))
17017 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17018 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17019 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17020 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17021 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17022 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17023 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17024
17025 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17026 Mask))
17027 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17028 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17029 Mask))
17030 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17031 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17032 Mask))
17033 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17034 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17035 Mask))
17036 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17037 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17038 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17039 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17040 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17041 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17042 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17043
17044 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17045 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17046 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17047 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17048 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17049 return Create64bitNode(ARMISD::VADDLVs, {A});
17050 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17051 return Create64bitNode(ARMISD::VADDLVu, {A});
17052 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17053 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17054 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17055 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17056 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17057 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17058
17059 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17060 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17061 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17062 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17063 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17064 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17065 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17066 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17067 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17068 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17069 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17070 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17071 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17072 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17073
17074 // Some complications. We can get a case where the two inputs of the mul are
17075 // the same, then the output sext will have been helpfully converted to a
17076 // zext. Turn it back.
17077 SDValue Op = N0;
17078 if (Op->getOpcode() == ISD::VSELECT)
17079 Op = Op->getOperand(1);
17080 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17081 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17082 SDValue Mul = Op->getOperand(0);
17083 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17084 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17085 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17086 if (Op != N0)
17087 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17088 N0->getOperand(0), Ext, N0->getOperand(2));
17089 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17090 }
17091 }
17092
17093 return SDValue();
17094}
17095
17096static SDValue PerformVMOVNCombine(SDNode *N,
17097 TargetLowering::DAGCombinerInfo &DCI) {
17098 SDValue Op0 = N->getOperand(0);
17099 SDValue Op1 = N->getOperand(1);
17100 unsigned IsTop = N->getConstantOperandVal(2);
17101
17102 // VMOVNT a undef -> a
17103 // VMOVNB a undef -> a
17104 // VMOVNB undef a -> a
17105 if (Op1->isUndef())
17106 return Op0;
17107 if (Op0->isUndef() && !IsTop)
17108 return Op1;
17109
17110 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17111 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17112 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17113 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17114 Op1->getConstantOperandVal(2) == 0)
17115 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17116 Op0, Op1->getOperand(1), N->getOperand(2));
17117
17118 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17119 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17120 // into the top or bottom lanes.
17121 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17122 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17123 APInt Op0DemandedElts =
17124 IsTop ? Op1DemandedElts
17125 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17126
17127 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17128 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17129 return SDValue(N, 0);
17130 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17131 return SDValue(N, 0);
17132
17133 return SDValue();
17134}
17135
17136static SDValue PerformVQMOVNCombine(SDNode *N,
17137 TargetLowering::DAGCombinerInfo &DCI) {
17138 SDValue Op0 = N->getOperand(0);
17139 unsigned IsTop = N->getConstantOperandVal(2);
17140
17141 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17142 APInt Op0DemandedElts =
17143 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17144 : APInt::getHighBitsSet(2, 1));
17145
17146 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17147 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17148 return SDValue(N, 0);
17149 return SDValue();
17150}
17151
17152static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
17153 SDLoc DL(N);
17154 SDValue Op0 = N->getOperand(0);
17155 SDValue Op1 = N->getOperand(1);
17156
17157 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17158 // uses of the intrinsics.
17159 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17160 int ShiftAmt = C->getSExtValue();
17161 if (ShiftAmt == 0) {
17162 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17163 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17164 return SDValue();
17165 }
17166
17167 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17168 unsigned NewOpcode =
17169 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17170 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17171 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17172 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17173 return NewShift;
17174 }
17175 }
17176
17177 return SDValue();
17178}
17179
17180/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17181SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
17182 DAGCombinerInfo &DCI) const {
17183 SelectionDAG &DAG = DCI.DAG;
17184 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
17185 switch (IntNo) {
17186 default:
17187 // Don't do anything for most intrinsics.
17188 break;
17189
17190 // Vector shifts: check for immediate versions and lower them.
17191 // Note: This is done during DAG combining instead of DAG legalizing because
17192 // the build_vectors for 64-bit vector element shift counts are generally
17193 // not legal, and it is hard to see their values after they get legalized to
17194 // loads from a constant pool.
17195 case Intrinsic::arm_neon_vshifts:
17196 case Intrinsic::arm_neon_vshiftu:
17197 case Intrinsic::arm_neon_vrshifts:
17198 case Intrinsic::arm_neon_vrshiftu:
17199 case Intrinsic::arm_neon_vrshiftn:
17200 case Intrinsic::arm_neon_vqshifts:
17201 case Intrinsic::arm_neon_vqshiftu:
17202 case Intrinsic::arm_neon_vqshiftsu:
17203 case Intrinsic::arm_neon_vqshiftns:
17204 case Intrinsic::arm_neon_vqshiftnu:
17205 case Intrinsic::arm_neon_vqshiftnsu:
17206 case Intrinsic::arm_neon_vqrshiftns:
17207 case Intrinsic::arm_neon_vqrshiftnu:
17208 case Intrinsic::arm_neon_vqrshiftnsu: {
17209 EVT VT = N->getOperand(1).getValueType();
17210 int64_t Cnt;
17211 unsigned VShiftOpc = 0;
17212
17213 switch (IntNo) {
17214 case Intrinsic::arm_neon_vshifts:
17215 case Intrinsic::arm_neon_vshiftu:
17216 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17217 VShiftOpc = ARMISD::VSHLIMM;
17218 break;
17219 }
17220 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17221 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17222 : ARMISD::VSHRuIMM);
17223 break;
17224 }
17225 return SDValue();
17226
17227 case Intrinsic::arm_neon_vrshifts:
17228 case Intrinsic::arm_neon_vrshiftu:
17229 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17230 break;
17231 return SDValue();
17232
17233 case Intrinsic::arm_neon_vqshifts:
17234 case Intrinsic::arm_neon_vqshiftu:
17235 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17236 break;
17237 return SDValue();
17238
17239 case Intrinsic::arm_neon_vqshiftsu:
17240 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17241 break;
17242 llvm_unreachable("invalid shift count for vqshlu intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vqshlu intrinsic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17242)
;
17243
17244 case Intrinsic::arm_neon_vrshiftn:
17245 case Intrinsic::arm_neon_vqshiftns:
17246 case Intrinsic::arm_neon_vqshiftnu:
17247 case Intrinsic::arm_neon_vqshiftnsu:
17248 case Intrinsic::arm_neon_vqrshiftns:
17249 case Intrinsic::arm_neon_vqrshiftnu:
17250 case Intrinsic::arm_neon_vqrshiftnsu:
17251 // Narrowing shifts require an immediate right shift.
17252 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17253 break;
17254 llvm_unreachable("invalid shift count for narrowing vector shift "::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17255
)
17255 "intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for narrowing vector shift "
"intrinsic", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17255
)
;
17256
17257 default:
17258 llvm_unreachable("unhandled vector shift")::llvm::llvm_unreachable_internal("unhandled vector shift", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17258)
;
17259 }
17260
17261 switch (IntNo) {
17262 case Intrinsic::arm_neon_vshifts:
17263 case Intrinsic::arm_neon_vshiftu:
17264 // Opcode already set above.
17265 break;
17266 case Intrinsic::arm_neon_vrshifts:
17267 VShiftOpc = ARMISD::VRSHRsIMM;
17268 break;
17269 case Intrinsic::arm_neon_vrshiftu:
17270 VShiftOpc = ARMISD::VRSHRuIMM;
17271 break;
17272 case Intrinsic::arm_neon_vrshiftn:
17273 VShiftOpc = ARMISD::VRSHRNIMM;
17274 break;
17275 case Intrinsic::arm_neon_vqshifts:
17276 VShiftOpc = ARMISD::VQSHLsIMM;
17277 break;
17278 case Intrinsic::arm_neon_vqshiftu:
17279 VShiftOpc = ARMISD::VQSHLuIMM;
17280 break;
17281 case Intrinsic::arm_neon_vqshiftsu:
17282 VShiftOpc = ARMISD::VQSHLsuIMM;
17283 break;
17284 case Intrinsic::arm_neon_vqshiftns:
17285 VShiftOpc = ARMISD::VQSHRNsIMM;
17286 break;
17287 case Intrinsic::arm_neon_vqshiftnu:
17288 VShiftOpc = ARMISD::VQSHRNuIMM;
17289 break;
17290 case Intrinsic::arm_neon_vqshiftnsu:
17291 VShiftOpc = ARMISD::VQSHRNsuIMM;
17292 break;
17293 case Intrinsic::arm_neon_vqrshiftns:
17294 VShiftOpc = ARMISD::VQRSHRNsIMM;
17295 break;
17296 case Intrinsic::arm_neon_vqrshiftnu:
17297 VShiftOpc = ARMISD::VQRSHRNuIMM;
17298 break;
17299 case Intrinsic::arm_neon_vqrshiftnsu:
17300 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17301 break;
17302 }
17303
17304 SDLoc dl(N);
17305 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17306 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17307 }
17308
17309 case Intrinsic::arm_neon_vshiftins: {
17310 EVT VT = N->getOperand(1).getValueType();
17311 int64_t Cnt;
17312 unsigned VShiftOpc = 0;
17313
17314 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17315 VShiftOpc = ARMISD::VSLIIMM;
17316 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17317 VShiftOpc = ARMISD::VSRIIMM;
17318 else {
17319 llvm_unreachable("invalid shift count for vsli/vsri intrinsic")::llvm::llvm_unreachable_internal("invalid shift count for vsli/vsri intrinsic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17319)
;
17320 }
17321
17322 SDLoc dl(N);
17323 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17324 N->getOperand(1), N->getOperand(2),
17325 DAG.getConstant(Cnt, dl, MVT::i32));
17326 }
17327
17328 case Intrinsic::arm_neon_vqrshifts:
17329 case Intrinsic::arm_neon_vqrshiftu:
17330 // No immediate versions of these to check for.
17331 break;
17332
17333 case Intrinsic::arm_mve_vqdmlah:
17334 case Intrinsic::arm_mve_vqdmlash:
17335 case Intrinsic::arm_mve_vqrdmlah:
17336 case Intrinsic::arm_mve_vqrdmlash:
17337 case Intrinsic::arm_mve_vmla_n_predicated:
17338 case Intrinsic::arm_mve_vmlas_n_predicated:
17339 case Intrinsic::arm_mve_vqdmlah_predicated:
17340 case Intrinsic::arm_mve_vqdmlash_predicated:
17341 case Intrinsic::arm_mve_vqrdmlah_predicated:
17342 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17343 // These intrinsics all take an i32 scalar operand which is narrowed to the
17344 // size of a single lane of the vector type they return. So we don't need
17345 // any bits of that operand above that point, which allows us to eliminate
17346 // uxth/sxth.
17347 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17348 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17349 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17350 return SDValue();
17351 break;
17352 }
17353
17354 case Intrinsic::arm_mve_minv:
17355 case Intrinsic::arm_mve_maxv:
17356 case Intrinsic::arm_mve_minav:
17357 case Intrinsic::arm_mve_maxav:
17358 case Intrinsic::arm_mve_minv_predicated:
17359 case Intrinsic::arm_mve_maxv_predicated:
17360 case Intrinsic::arm_mve_minav_predicated:
17361 case Intrinsic::arm_mve_maxav_predicated: {
17362 // These intrinsics all take an i32 scalar operand which is narrowed to the
17363 // size of a single lane of the vector type they take as the other input.
17364 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17365 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17366 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17367 return SDValue();
17368 break;
17369 }
17370
17371 case Intrinsic::arm_mve_addv: {
17372 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17373 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17374 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17375 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17376 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17377 }
17378
17379 case Intrinsic::arm_mve_addlv:
17380 case Intrinsic::arm_mve_addlv_predicated: {
17381 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17382 // which recombines the two outputs into an i64
17383 bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
17384 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17385 (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :
17386 (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);
17387
17388 SmallVector<SDValue, 4> Ops;
17389 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17390 if (i != 2) // skip the unsigned flag
17391 Ops.push_back(N->getOperand(i));
17392
17393 SDLoc dl(N);
17394 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17395 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17396 val.getValue(1));
17397 }
17398 }
17399
17400 return SDValue();
17401}
17402
17403/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17404/// lowers them. As with the vector shift intrinsics, this is done during DAG
17405/// combining instead of DAG legalizing because the build_vectors for 64-bit
17406/// vector element shift counts are generally not legal, and it is hard to see
17407/// their values after they get legalized to loads from a constant pool.
17408static SDValue PerformShiftCombine(SDNode *N,
17409 TargetLowering::DAGCombinerInfo &DCI,
17410 const ARMSubtarget *ST) {
17411 SelectionDAG &DAG = DCI.DAG;
17412 EVT VT = N->getValueType(0);
17413
17414 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17415 N->getOperand(0)->getOpcode() == ISD::AND &&
17416 N->getOperand(0)->hasOneUse()) {
17417 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17418 return SDValue();
17419 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17420 // usually show up because instcombine prefers to canonicalize it to
17421 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17422 // out of GEP lowering in some cases.
17423 SDValue N0 = N->getOperand(0);
17424 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17425 if (!ShiftAmtNode)
17426 return SDValue();
17427 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17428 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17429 if (!AndMaskNode)
17430 return SDValue();
17431 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17432 // Don't transform uxtb/uxth.
17433 if (AndMask == 255 || AndMask == 65535)
17434 return SDValue();
17435 if (isMask_32(AndMask)) {
17436 uint32_t MaskedBits = countLeadingZeros(AndMask);
17437 if (MaskedBits > ShiftAmt) {
17438 SDLoc DL(N);
17439 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17440 DAG.getConstant(MaskedBits, DL, MVT::i32));
17441 return DAG.getNode(
17442 ISD::SRL, DL, MVT::i32, SHL,
17443 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17444 }
17445 }
17446 }
17447
17448 // Nothing to be done for scalar shifts.
17449 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17450 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17451 return SDValue();
17452 if (ST->hasMVEIntegerOps())
17453 return SDValue();
17454
17455 int64_t Cnt;
17456
17457 switch (N->getOpcode()) {
17458 default: llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17458)
;
17459
17460 case ISD::SHL:
17461 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17462 SDLoc dl(N);
17463 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17464 DAG.getConstant(Cnt, dl, MVT::i32));
17465 }
17466 break;
17467
17468 case ISD::SRA:
17469 case ISD::SRL:
17470 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17471 unsigned VShiftOpc =
17472 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17473 SDLoc dl(N);
17474 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17475 DAG.getConstant(Cnt, dl, MVT::i32));
17476 }
17477 }
17478 return SDValue();
17479}
17480
17481// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17482// split into multiple extending loads, which are simpler to deal with than an
17483// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17484// to convert the type to an f32.
17485static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {
17486 SDValue N0 = N->getOperand(0);
17487 if (N0.getOpcode() != ISD::LOAD)
17488 return SDValue();
17489 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17490 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17491 LD->getExtensionType() != ISD::NON_EXTLOAD)
17492 return SDValue();
17493 EVT FromVT = LD->getValueType(0);
17494 EVT ToVT = N->getValueType(0);
17495 if (!ToVT.isVector())
17496 return SDValue();
17497 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements())(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements()) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17497, __extension__
__PRETTY_FUNCTION__))
;
17498 EVT ToEltVT = ToVT.getVectorElementType();
17499 EVT FromEltVT = FromVT.getVectorElementType();
17500
17501 unsigned NumElements = 0;
17502 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17503 NumElements = 4;
17504 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17505 NumElements = 4;
17506 if (NumElements == 0 ||
17507 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17508 FromVT.getVectorNumElements() % NumElements != 0 ||
17509 !isPowerOf2_32(NumElements))
17510 return SDValue();
17511
17512 LLVMContext &C = *DAG.getContext();
17513 SDLoc DL(LD);
17514 // Details about the old load
17515 SDValue Ch = LD->getChain();
17516 SDValue BasePtr = LD->getBasePtr();
17517 Align Alignment = LD->getOriginalAlign();
17518 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17519 AAMDNodes AAInfo = LD->getAAInfo();
17520
17521 ISD::LoadExtType NewExtType =
17522 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17523 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17524 EVT NewFromVT = EVT::getVectorVT(
17525 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17526 EVT NewToVT = EVT::getVectorVT(
17527 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17528
17529 SmallVector<SDValue, 4> Loads;
17530 SmallVector<SDValue, 4> Chains;
17531 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17532 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17533 SDValue NewPtr =
17534 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
17535
17536 SDValue NewLoad =
17537 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17538 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17539 Alignment, MMOFlags, AAInfo);
17540 Loads.push_back(NewLoad);
17541 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17542 }
17543
17544 // Float truncs need to extended with VCVTB's into their floating point types.
17545 if (FromEltVT == MVT::f16) {
17546 SmallVector<SDValue, 4> Extends;
17547
17548 for (unsigned i = 0; i < Loads.size(); i++) {
17549 SDValue LoadBC =
17550 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17551 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17552 DAG.getConstant(0, DL, MVT::i32));
17553 Extends.push_back(FPExt);
17554 }
17555
17556 Loads = Extends;
17557 }
17558
17559 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17560 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17561 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17562}
17563
17564/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17565/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17566static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
17567 const ARMSubtarget *ST) {
17568 SDValue N0 = N->getOperand(0);
17569
17570 // Check for sign- and zero-extensions of vector extract operations of 8- and
17571 // 16-bit vector elements. NEON and MVE support these directly. They are
17572 // handled during DAG combining because type legalization will promote them
17573 // to 32-bit types and it is messy to recognize the operations after that.
17574 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17575 N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17576 SDValue Vec = N0.getOperand(0);
17577 SDValue Lane = N0.getOperand(1);
17578 EVT VT = N->getValueType(0);
17579 EVT EltVT = N0.getValueType();
17580 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17581
17582 if (VT == MVT::i32 &&
17583 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17584 TLI.isTypeLegal(Vec.getValueType()) &&
17585 isa<ConstantSDNode>(Lane)) {
17586
17587 unsigned Opc = 0;
17588 switch (N->getOpcode()) {
17589 default: llvm_unreachable("unexpected opcode")::llvm::llvm_unreachable_internal("unexpected opcode", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 17589)
;
17590 case ISD::SIGN_EXTEND:
17591 Opc = ARMISD::VGETLANEs;
17592 break;
17593 case ISD::ZERO_EXTEND:
17594 case ISD::ANY_EXTEND:
17595 Opc = ARMISD::VGETLANEu;
17596 break;
17597 }
17598 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17599 }
17600 }
17601
17602 if (ST->hasMVEIntegerOps())
17603 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17604 return NewLoad;
17605
17606 return SDValue();
17607}
17608
17609static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,
17610 const ARMSubtarget *ST) {
17611 if (ST->hasMVEFloatOps())
17612 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17613 return NewLoad;
17614
17615 return SDValue();
17616}
17617
17618// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17619// constant bounds.
17620static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
17621 const ARMSubtarget *Subtarget) {
17622 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17623 !Subtarget->isThumb2())
17624 return SDValue();
17625
17626 EVT VT = Op.getValueType();
17627 SDValue Op0 = Op.getOperand(0);
17628
17629 if (VT != MVT::i32 ||
17630 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17631 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17632 !isa<ConstantSDNode>(Op0.getOperand(1)))
17633 return SDValue();
17634
17635 SDValue Min = Op;
17636 SDValue Max = Op0;
17637 SDValue Input = Op0.getOperand(0);
17638 if (Min.getOpcode() == ISD::SMAX)
17639 std::swap(Min, Max);
17640
17641 APInt MinC = Min.getConstantOperandAPInt(1);
17642 APInt MaxC = Max.getConstantOperandAPInt(1);
17643
17644 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17645 !(MinC + 1).isPowerOf2())
17646 return SDValue();
17647
17648 SDLoc DL(Op);
17649 if (MinC == ~MaxC)
17650 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17651 DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
17652 if (MaxC == 0)
17653 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17654 DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
17655
17656 return SDValue();
17657}
17658
17659/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17660/// saturates.
17661static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
17662 const ARMSubtarget *ST) {
17663 EVT VT = N->getValueType(0);
17664 SDValue N0 = N->getOperand(0);
17665
17666 if (VT == MVT::i32)
17667 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17668
17669 if (!ST->hasMVEIntegerOps())
17670 return SDValue();
17671
17672 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17673 return V;
17674
17675 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17676 return SDValue();
17677
17678 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17679 // Check one is a smin and the other is a smax
17680 if (Min->getOpcode() != ISD::SMIN)
17681 std::swap(Min, Max);
17682 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17683 return false;
17684
17685 APInt SaturateC;
17686 if (VT == MVT::v4i32)
17687 SaturateC = APInt(32, (1 << 15) - 1, true);
17688 else //if (VT == MVT::v8i16)
17689 SaturateC = APInt(16, (1 << 7) - 1, true);
17690
17691 APInt MinC, MaxC;
17692 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17693 MinC != SaturateC)
17694 return false;
17695 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17696 MaxC != ~SaturateC)
17697 return false;
17698 return true;
17699 };
17700
17701 if (IsSignedSaturate(N, N0.getNode())) {
17702 SDLoc DL(N);
17703 MVT ExtVT, HalfVT;
17704 if (VT == MVT::v4i32) {
17705 HalfVT = MVT::v8i16;
17706 ExtVT = MVT::v4i16;
17707 } else { // if (VT == MVT::v8i16)
17708 HalfVT = MVT::v16i8;
17709 ExtVT = MVT::v8i8;
17710 }
17711
17712 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17713 // half. That extend will hopefully be removed if only the bottom bits are
17714 // demanded (though a truncating store, for example).
17715 SDValue VQMOVN =
17716 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
17717 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
17718 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17719 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
17720 DAG.getValueType(ExtVT));
17721 }
17722
17723 auto IsUnsignedSaturate = [&](SDNode *Min) {
17724 // For unsigned, we just need to check for <= 0xffff
17725 if (Min->getOpcode() != ISD::UMIN)
17726 return false;
17727
17728 APInt SaturateC;
17729 if (VT == MVT::v4i32)
17730 SaturateC = APInt(32, (1 << 16) - 1, true);
17731 else //if (VT == MVT::v8i16)
17732 SaturateC = APInt(16, (1 << 8) - 1, true);
17733
17734 APInt MinC;
17735 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17736 MinC != SaturateC)
17737 return false;
17738 return true;
17739 };
17740
17741 if (IsUnsignedSaturate(N)) {
17742 SDLoc DL(N);
17743 MVT HalfVT;
17744 unsigned ExtConst;
17745 if (VT == MVT::v4i32) {
17746 HalfVT = MVT::v8i16;
17747 ExtConst = 0x0000FFFF;
17748 } else { //if (VT == MVT::v8i16)
17749 HalfVT = MVT::v16i8;
17750 ExtConst = 0x00FF;
17751 }
17752
17753 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
17754 // an AND. That extend will hopefully be removed if only the bottom bits are
17755 // demanded (though a truncating store, for example).
17756 SDValue VQMOVN =
17757 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
17758 DAG.getConstant(0, DL, MVT::i32));
17759 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
17760 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
17761 DAG.getConstant(ExtConst, DL, VT));
17762 }
17763
17764 return SDValue();
17765}
17766
17767static const APInt *isPowerOf2Constant(SDValue V) {
17768 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
17769 if (!C)
17770 return nullptr;
17771 const APInt *CV = &C->getAPIntValue();
17772 return CV->isPowerOf2() ? CV : nullptr;
17773}
17774
17775SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
17776 // If we have a CMOV, OR and AND combination such as:
17777 // if (x & CN)
17778 // y |= CM;
17779 //
17780 // And:
17781 // * CN is a single bit;
17782 // * All bits covered by CM are known zero in y
17783 //
17784 // Then we can convert this into a sequence of BFI instructions. This will
17785 // always be a win if CM is a single bit, will always be no worse than the
17786 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
17787 // three bits (due to the extra IT instruction).
17788
17789 SDValue Op0 = CMOV->getOperand(0);
17790 SDValue Op1 = CMOV->getOperand(1);
17791 auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
17792 auto CC = CCNode->getAPIntValue().getLimitedValue();
17793 SDValue CmpZ = CMOV->getOperand(4);
17794
17795 // The compare must be against zero.
17796 if (!isNullConstant(CmpZ->getOperand(1)))
17797 return SDValue();
17798
17799 assert(CmpZ->getOpcode() == ARMISD::CMPZ)(static_cast <bool> (CmpZ->getOpcode() == ARMISD::CMPZ
) ? void (0) : __assert_fail ("CmpZ->getOpcode() == ARMISD::CMPZ"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17799, __extension__
__PRETTY_FUNCTION__))
;
17800 SDValue And = CmpZ->getOperand(0);
17801 if (And->getOpcode() != ISD::AND)
17802 return SDValue();
17803 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
17804 if (!AndC)
17805 return SDValue();
17806 SDValue X = And->getOperand(0);
17807
17808 if (CC == ARMCC::EQ) {
17809 // We're performing an "equal to zero" compare. Swap the operands so we
17810 // canonicalize on a "not equal to zero" compare.
17811 std::swap(Op0, Op1);
17812 } else {
17813 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?")(static_cast <bool> (CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"
) ? void (0) : __assert_fail ("CC == ARMCC::NE && \"How can a CMPZ node not be EQ or NE?\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17813, __extension__
__PRETTY_FUNCTION__))
;
17814 }
17815
17816 if (Op1->getOpcode() != ISD::OR)
17817 return SDValue();
17818
17819 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
17820 if (!OrC)
17821 return SDValue();
17822 SDValue Y = Op1->getOperand(0);
17823
17824 if (Op0 != Y)
17825 return SDValue();
17826
17827 // Now, is it profitable to continue?
17828 APInt OrCI = OrC->getAPIntValue();
17829 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
17830 if (OrCI.countPopulation() > Heuristic)
17831 return SDValue();
17832
17833 // Lastly, can we determine that the bits defined by OrCI
17834 // are zero in Y?
17835 KnownBits Known = DAG.computeKnownBits(Y);
17836 if ((OrCI & Known.Zero) != OrCI)
17837 return SDValue();
17838
17839 // OK, we can do the combine.
17840 SDValue V = Y;
17841 SDLoc dl(X);
17842 EVT VT = X.getValueType();
17843 unsigned BitInX = AndC->logBase2();
17844
17845 if (BitInX != 0) {
17846 // We must shift X first.
17847 X = DAG.getNode(ISD::SRL, dl, VT, X,
17848 DAG.getConstant(BitInX, dl, VT));
17849 }
17850
17851 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
17852 BitInY < NumActiveBits; ++BitInY) {
17853 if (OrCI[BitInY] == 0)
17854 continue;
17855 APInt Mask(VT.getSizeInBits(), 0);
17856 Mask.setBit(BitInY);
17857 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
17858 // Confusingly, the operand is an *inverted* mask.
17859 DAG.getConstant(~Mask, dl, VT));
17860 }
17861
17862 return V;
17863}
17864
17865// Given N, the value controlling the conditional branch, search for the loop
17866// intrinsic, returning it, along with how the value is used. We need to handle
17867// patterns such as the following:
17868// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
17869// (brcond (setcc (loop.decrement), 0, eq), exit)
17870// (brcond (setcc (loop.decrement), 0, ne), header)
17871static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
17872 bool &Negate) {
17873 switch (N->getOpcode()) {
17874 default:
17875 break;
17876 case ISD::XOR: {
17877 if (!isa<ConstantSDNode>(N.getOperand(1)))
17878 return SDValue();
17879 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
17880 return SDValue();
17881 Negate = !Negate;
17882 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
17883 }
17884 case ISD::SETCC: {
17885 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
17886 if (!Const)
17887 return SDValue();
17888 if (Const->isZero())
17889 Imm = 0;
17890 else if (Const->isOne())
17891 Imm = 1;
17892 else
17893 return SDValue();
17894 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
17895 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
17896 }
17897 case ISD::INTRINSIC_W_CHAIN: {
17898 unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
17899 if (IntOp != Intrinsic::test_start_loop_iterations &&
17900 IntOp != Intrinsic::loop_decrement_reg)
17901 return SDValue();
17902 return N;
17903 }
17904 }
17905 return SDValue();
17906}
17907
17908static SDValue PerformHWLoopCombine(SDNode *N,
17909 TargetLowering::DAGCombinerInfo &DCI,
17910 const ARMSubtarget *ST) {
17911
17912 // The hwloop intrinsics that we're interested are used for control-flow,
17913 // either for entering or exiting the loop:
17914 // - test.start.loop.iterations will test whether its operand is zero. If it
17915 // is zero, the proceeding branch should not enter the loop.
17916 // - loop.decrement.reg also tests whether its operand is zero. If it is
17917 // zero, the proceeding branch should not branch back to the beginning of
17918 // the loop.
17919 // So here, we need to check that how the brcond is using the result of each
17920 // of the intrinsics to ensure that we're branching to the right place at the
17921 // right time.
17922
17923 ISD::CondCode CC;
17924 SDValue Cond;
17925 int Imm = 1;
17926 bool Negate = false;
17927 SDValue Chain = N->getOperand(0);
17928 SDValue Dest;
17929
17930 if (N->getOpcode() == ISD::BRCOND) {
17931 CC = ISD::SETEQ;
17932 Cond = N->getOperand(1);
17933 Dest = N->getOperand(2);
17934 } else {
17935 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!")(static_cast <bool> (N->getOpcode() == ISD::BR_CC &&
"Expected BRCOND or BR_CC!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::BR_CC && \"Expected BRCOND or BR_CC!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17935, __extension__
__PRETTY_FUNCTION__))
;
17936 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17937 Cond = N->getOperand(2);
17938 Dest = N->getOperand(4);
17939 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
17940 if (!Const->isOne() && !Const->isZero())
17941 return SDValue();
17942 Imm = Const->getZExtValue();
17943 } else
17944 return SDValue();
17945 }
17946
17947 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
17948 if (!Int)
17949 return SDValue();
17950
17951 if (Negate)
17952 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
17953
17954 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
17955 return (CC == ISD::SETEQ && Imm == 0) ||
17956 (CC == ISD::SETNE && Imm == 1) ||
17957 (CC == ISD::SETLT && Imm == 1) ||
17958 (CC == ISD::SETULT && Imm == 1);
17959 };
17960
17961 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
17962 return (CC == ISD::SETEQ && Imm == 1) ||
17963 (CC == ISD::SETNE && Imm == 0) ||
17964 (CC == ISD::SETGT && Imm == 0) ||
17965 (CC == ISD::SETUGT && Imm == 0) ||
17966 (CC == ISD::SETGE && Imm == 1) ||
17967 (CC == ISD::SETUGE && Imm == 1);
17968 };
17969
17970 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&(static_cast <bool> ((IsTrueIfZero(CC, Imm) || IsFalseIfZero
(CC, Imm)) && "unsupported condition") ? void (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17971, __extension__
__PRETTY_FUNCTION__))
17971 "unsupported condition")(static_cast <bool> ((IsTrueIfZero(CC, Imm) || IsFalseIfZero
(CC, Imm)) && "unsupported condition") ? void (0) : __assert_fail
("(IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) && \"unsupported condition\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17971, __extension__
__PRETTY_FUNCTION__))
;
17972
17973 SDLoc dl(Int);
17974 SelectionDAG &DAG = DCI.DAG;
17975 SDValue Elements = Int.getOperand(2);
17976 unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
17977 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)(static_cast <bool> ((N->hasOneUse() && N->
use_begin()->getOpcode() == ISD::BR) && "expected single br user"
) ? void (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17978, __extension__
__PRETTY_FUNCTION__))
17978 && "expected single br user")(static_cast <bool> ((N->hasOneUse() && N->
use_begin()->getOpcode() == ISD::BR) && "expected single br user"
) ? void (0) : __assert_fail ("(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && \"expected single br user\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 17978, __extension__
__PRETTY_FUNCTION__))
;
17979 SDNode *Br = *N->use_begin();
17980 SDValue OtherTarget = Br->getOperand(1);
17981
17982 // Update the unconditional branch to branch to the given Dest.
17983 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
17984 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
17985 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
17986 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
17987 };
17988
17989 if (IntOp == Intrinsic::test_start_loop_iterations) {
17990 SDValue Res;
17991 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
17992 // We expect this 'instruction' to branch when the counter is zero.
17993 if (IsTrueIfZero(CC, Imm)) {
17994 SDValue Ops[] = {Chain, Setup, Dest};
17995 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
17996 } else {
17997 // The logic is the reverse of what we need for WLS, so find the other
17998 // basic block target: the target of the proceeding br.
17999 UpdateUncondBr(Br, Dest, DAG);
18000
18001 SDValue Ops[] = {Chain, Setup, OtherTarget};
18002 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18003 }
18004 // Update LR count to the new value
18005 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18006 // Update chain
18007 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18008 return Res;
18009 } else {
18010 SDValue Size = DAG.getTargetConstant(
18011 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
18012 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18013 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18014 DAG.getVTList(MVT::i32, MVT::Other), Args);
18015 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18016
18017 // We expect this instruction to branch when the count is not zero.
18018 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18019
18020 // Update the unconditional branch to target the loop preheader if we've
18021 // found the condition has been reversed.
18022 if (Target == OtherTarget)
18023 UpdateUncondBr(Br, Dest, DAG);
18024
18025 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18026 SDValue(LoopDec.getNode(), 1), Chain);
18027
18028 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18029 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18030 }
18031 return SDValue();
18032}
18033
18034/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18035SDValue
18036ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
18037 SDValue Cmp = N->getOperand(4);
18038 if (Cmp.getOpcode() != ARMISD::CMPZ)
18039 // Only looking at NE cases.
18040 return SDValue();
18041
18042 EVT VT = N->getValueType(0);
18043 SDLoc dl(N);
18044 SDValue LHS = Cmp.getOperand(0);
18045 SDValue RHS = Cmp.getOperand(1);
18046 SDValue Chain = N->getOperand(0);
18047 SDValue BB = N->getOperand(1);
18048 SDValue ARMcc = N->getOperand(2);
18049 ARMCC::CondCodes CC =
18050 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18051
18052 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18053 // -> (brcond Chain BB CC CPSR Cmp)
18054 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18055 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18056 LHS->getOperand(0)->hasOneUse()) {
18057 auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
18058 auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
18059 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
18060 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
18061 if ((LHS00C && LHS00C->getZExtValue() == 0) &&
18062 (LHS01C && LHS01C->getZExtValue() == 1) &&
18063 (LHS1C && LHS1C->getZExtValue() == 1) &&
18064 (RHSC && RHSC->getZExtValue() == 0)) {
18065 return DAG.getNode(
18066 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18067 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18068 }
18069 }
18070
18071 return SDValue();
18072}
18073
18074/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18075SDValue
18076ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
18077 SDValue Cmp = N->getOperand(4);
18078 if (Cmp.getOpcode() != ARMISD::CMPZ)
18079 // Only looking at EQ and NE cases.
18080 return SDValue();
18081
18082 EVT VT = N->getValueType(0);
18083 SDLoc dl(N);
18084 SDValue LHS = Cmp.getOperand(0);
18085 SDValue RHS = Cmp.getOperand(1);
18086 SDValue FalseVal = N->getOperand(0);
18087 SDValue TrueVal = N->getOperand(1);
18088 SDValue ARMcc = N->getOperand(2);
18089 ARMCC::CondCodes CC =
18090 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
18091
18092 // BFI is only available on V6T2+.
18093 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18094 SDValue R = PerformCMOVToBFICombine(N, DAG);
18095 if (R)
18096 return R;
18097 }
18098
18099 // Simplify
18100 // mov r1, r0
18101 // cmp r1, x
18102 // mov r0, y
18103 // moveq r0, x
18104 // to
18105 // cmp r0, x
18106 // movne r0, y
18107 //
18108 // mov r1, r0
18109 // cmp r1, x
18110 // mov r0, x
18111 // movne r0, y
18112 // to
18113 // cmp r0, x
18114 // movne r0, y
18115 /// FIXME: Turn this into a target neutral optimization?
18116 SDValue Res;
18117 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18118 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18119 N->getOperand(3), Cmp);
18120 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18121 SDValue ARMcc;
18122 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18123 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18124 N->getOperand(3), NewCmp);
18125 }
18126
18127 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18128 // -> (cmov F T CC CPSR Cmp)
18129 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
18130 auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
18131 auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
18132 auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
18133 if ((LHS0C && LHS0C->getZExtValue() == 0) &&
18134 (LHS1C && LHS1C->getZExtValue() == 1) &&
18135 (RHSC && RHSC->getZExtValue() == 0)) {
18136 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18137 LHS->getOperand(2), LHS->getOperand(3),
18138 LHS->getOperand(4));
18139 }
18140 }
18141
18142 if (!VT.isInteger())
18143 return SDValue();
18144
18145 // Fold away an unneccessary CMPZ/CMOV
18146 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18147 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18148 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18149 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18150 N->getConstantOperandVal(2) == ARMCC::NE) {
18151 ARMCC::CondCodes Cond;
18152 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18153 if (N->getConstantOperandVal(2) == ARMCC::NE)
18154 Cond = ARMCC::getOppositeCondition(Cond);
18155 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18156 N->getOperand(1),
18157 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18158 N->getOperand(3), C);
18159 }
18160 }
18161
18162 // Materialize a boolean comparison for integers so we can avoid branching.
18163 if (isNullConstant(FalseVal)) {
18164 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18165 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18166 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18167 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18168 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18169 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18170 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18171 DAG.getConstant(5, dl, MVT::i32));
18172 } else {
18173 // CMOV 0, 1, ==, (CMPZ x, y) ->
18174 // (ADDCARRY (SUB x, y), t:0, t:1)
18175 // where t = (SUBCARRY 0, (SUB x, y), 0)
18176 //
18177 // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
18178 // x != y. In other words, a carry C == 1 when x == y, C == 0
18179 // otherwise.
18180 // The final ADDCARRY computes
18181 // x - y + (0 - (x - y)) + C == C
18182 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18183 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18184 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18185 // ISD::SUBCARRY returns a borrow but we want the carry here
18186 // actually.
18187 SDValue Carry =
18188 DAG.getNode(ISD::SUB, dl, MVT::i32,
18189 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18190 Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
18191 }
18192 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18193 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18194 // This seems pointless but will allow us to combine it further below.
18195 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18196 SDValue Sub =
18197 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18198 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18199 Sub.getValue(1), SDValue());
18200 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18201 N->getOperand(3), CPSRGlue.getValue(1));
18202 FalseVal = Sub;
18203 }
18204 } else if (isNullConstant(TrueVal)) {
18205 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18206 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18207 // This seems pointless but will allow us to combine it further below
18208 // Note that we change == for != as this is the dual for the case above.
18209 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18210 SDValue Sub =
18211 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18212 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18213 Sub.getValue(1), SDValue());
18214 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18215 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18216 N->getOperand(3), CPSRGlue.getValue(1));
18217 FalseVal = Sub;
18218 }
18219 }
18220
18221 // On Thumb1, the DAG above may be further combined if z is a power of 2
18222 // (z == 2 ^ K).
18223 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18224 // t1 = (USUBO (SUB x, y), 1)
18225 // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
18226 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18227 //
18228 // This also handles the special case of comparing against zero; it's
18229 // essentially, the same pattern, except there's no SUBS:
18230 // CMOV x, z, !=, (CMPZ x, 0) ->
18231 // t1 = (USUBO x, 1)
18232 // t2 = (SUBCARRY x, t1:0, t1:1)
18233 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18234 const APInt *TrueConst;
18235 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18236 ((FalseVal.getOpcode() == ARMISD::SUBS &&
18237 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18238 (FalseVal == LHS && isNullConstant(RHS))) &&
18239 (TrueConst = isPowerOf2Constant(TrueVal))) {
18240 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18241 unsigned ShiftAmount = TrueConst->logBase2();
18242 if (ShiftAmount)
18243 TrueVal = DAG.getConstant(1, dl, VT);
18244 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18245 Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
18246
18247 if (ShiftAmount)
18248 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18249 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18250 }
18251
18252 if (Res.getNode()) {
18253 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18254 // Capture demanded bits information that would be otherwise lost.
18255 if (Known.Zero == 0xfffffffe)
18256 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18257 DAG.getValueType(MVT::i1));
18258 else if (Known.Zero == 0xffffff00)
18259 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18260 DAG.getValueType(MVT::i8));
18261 else if (Known.Zero == 0xffff0000)
18262 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18263 DAG.getValueType(MVT::i16));
18264 }
18265
18266 return Res;
18267}
18268
18269static SDValue PerformBITCASTCombine(SDNode *N,
18270 TargetLowering::DAGCombinerInfo &DCI,
18271 const ARMSubtarget *ST) {
18272 SelectionDAG &DAG = DCI.DAG;
18273 SDValue Src = N->getOperand(0);
18274 EVT DstVT = N->getValueType(0);
18275
18276 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18277 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18278 EVT SrcVT = Src.getValueType();
18279 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18280 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18281 }
18282
18283 // We may have a bitcast of something that has already had this bitcast
18284 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18285 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18286 Src = Src.getOperand(0);
18287
18288 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18289 // would be generated is at least the width of the element type.
18290 EVT SrcVT = Src.getValueType();
18291 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18292 Src.getOpcode() == ARMISD::VMVNIMM ||
18293 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18294 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18295 DAG.getDataLayout().isBigEndian())
18296 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18297
18298 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18299 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18300 return R;
18301
18302 return SDValue();
18303}
18304
18305// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18306// node into stack operations after legalizeOps.
18307SDValue ARMTargetLowering::PerformMVETruncCombine(
18308 SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18309 SelectionDAG &DAG = DCI.DAG;
18310 EVT VT = N->getValueType(0);
18311 SDLoc DL(N);
18312
18313 // MVETrunc(Undef, Undef) -> Undef
18314 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18315 return DAG.getUNDEF(VT);
18316
18317 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18318 if (N->getNumOperands() == 2 &&
18319 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18320 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18321 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18322 N->getOperand(0).getOperand(1),
18323 N->getOperand(1).getOperand(0),
18324 N->getOperand(1).getOperand(1));
18325
18326 // MVETrunc(shuffle, shuffle) -> VMOVN
18327 if (N->getNumOperands() == 2 &&
18328 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18329 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18330 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18331 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18332
18333 if (S0->getOperand(0) == S1->getOperand(0) &&
18334 S0->getOperand(1) == S1->getOperand(1)) {
18335 // Construct complete shuffle mask
18336 SmallVector<int, 8> Mask(S0->getMask());
18337 Mask.append(S1->getMask().begin(), S1->getMask().end());
18338
18339 if (isVMOVNTruncMask(Mask, VT, false))
18340 return DAG.getNode(
18341 ARMISD::VMOVN, DL, VT,
18342 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18343 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18344 DAG.getConstant(1, DL, MVT::i32));
18345 if (isVMOVNTruncMask(Mask, VT, true))
18346 return DAG.getNode(
18347 ARMISD::VMOVN, DL, VT,
18348 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18349 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18350 DAG.getConstant(1, DL, MVT::i32));
18351 }
18352 }
18353
18354 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18355 // truncate to a buildvector to allow the generic optimisations to kick in.
18356 if (all_of(N->ops(), [](SDValue Op) {
18357 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18358 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18359 (Op.getOpcode() == ISD::BITCAST &&
18360 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18361 })) {
18362 SmallVector<SDValue, 8> Extracts;
18363 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18364 SDValue O = N->getOperand(Op);
18365 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18366 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18367 DAG.getConstant(i, DL, MVT::i32));
18368 Extracts.push_back(Ext);
18369 }
18370 }
18371 return DAG.getBuildVector(VT, DL, Extracts);
18372 }
18373
18374 // If we are late in the legalization process and nothing has optimised
18375 // the trunc to anything better, lower it to a stack store and reload,
18376 // performing the truncation whilst keeping the lanes in the correct order:
18377 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18378 if (!DCI.isAfterLegalizeDAG())
18379 return SDValue();
18380
18381 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18382 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18383 int NumIns = N->getNumOperands();
18384 assert((NumIns == 2 || NumIns == 4) &&(static_cast <bool> ((NumIns == 2 || NumIns == 4) &&
"Expected 2 or 4 inputs to an MVETrunc") ? void (0) : __assert_fail
("(NumIns == 2 || NumIns == 4) && \"Expected 2 or 4 inputs to an MVETrunc\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18385, __extension__
__PRETTY_FUNCTION__))
18385 "Expected 2 or 4 inputs to an MVETrunc")(static_cast <bool> ((NumIns == 2 || NumIns == 4) &&
"Expected 2 or 4 inputs to an MVETrunc") ? void (0) : __assert_fail
("(NumIns == 2 || NumIns == 4) && \"Expected 2 or 4 inputs to an MVETrunc\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18385, __extension__
__PRETTY_FUNCTION__))
;
18386 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18387 if (N->getNumOperands() == 4)
18388 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18389
18390 SmallVector<SDValue> Chains;
18391 for (int I = 0; I < NumIns; I++) {
18392 SDValue Ptr = DAG.getNode(
18393 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18394 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18395 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(
18396 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18397 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18398 Ptr, MPI, StoreVT, Align(4));
18399 Chains.push_back(Ch);
18400 }
18401
18402 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18403 MachinePointerInfo MPI =
18404 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);
18405 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18406}
18407
18408// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18409static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N,
18410 SelectionDAG &DAG) {
18411 SDValue N0 = N->getOperand(0);
18412 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18413 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18414 return SDValue();
18415
18416 EVT FromVT = LD->getMemoryVT();
18417 EVT ToVT = N->getValueType(0);
18418 if (!ToVT.isVector())
18419 return SDValue();
18420 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2)(static_cast <bool> (FromVT.getVectorNumElements() == ToVT
.getVectorNumElements() * 2) ? void (0) : __assert_fail ("FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18420, __extension__
__PRETTY_FUNCTION__))
;
18421 EVT ToEltVT = ToVT.getVectorElementType();
18422 EVT FromEltVT = FromVT.getVectorElementType();
18423
18424 unsigned NumElements = 0;
18425 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18426 NumElements = 4;
18427 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18428 NumElements = 8;
18429 assert(NumElements != 0)(static_cast <bool> (NumElements != 0) ? void (0) : __assert_fail
("NumElements != 0", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 18429, __extension__ __PRETTY_FUNCTION__))
;
18430
18431 ISD::LoadExtType NewExtType =
18432 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18433 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18434 LD->getExtensionType() != ISD::EXTLOAD &&
18435 LD->getExtensionType() != NewExtType)
18436 return SDValue();
18437
18438 LLVMContext &C = *DAG.getContext();
18439 SDLoc DL(LD);
18440 // Details about the old load
18441 SDValue Ch = LD->getChain();
18442 SDValue BasePtr = LD->getBasePtr();
18443 Align Alignment = LD->getOriginalAlign();
18444 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18445 AAMDNodes AAInfo = LD->getAAInfo();
18446
18447 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18448 EVT NewFromVT = EVT::getVectorVT(
18449 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18450 EVT NewToVT = EVT::getVectorVT(
18451 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18452
18453 SmallVector<SDValue, 4> Loads;
18454 SmallVector<SDValue, 4> Chains;
18455 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18456 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18457 SDValue NewPtr =
18458 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::Fixed(NewOffset));
18459
18460 SDValue NewLoad =
18461 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18462 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18463 Alignment, MMOFlags, AAInfo);
18464 Loads.push_back(NewLoad);
18465 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18466 }
18467
18468 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18469 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18470 return DAG.getMergeValues(Loads, DL);
18471}
18472
18473// Perform combines for MVEEXT. If it has not be optimized to anything better
18474// before lowering, it gets converted to stack store and extloads performing the
18475// extend whilst still keeping the same lane ordering.
18476SDValue ARMTargetLowering::PerformMVEExtCombine(
18477 SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {
18478 SelectionDAG &DAG = DCI.DAG;
18479 EVT VT = N->getValueType(0);
18480 SDLoc DL(N);
18481 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements")(static_cast <bool> (N->getNumValues() == 2 &&
"Expected MVEEXT with 2 elements") ? void (0) : __assert_fail
("N->getNumValues() == 2 && \"Expected MVEEXT with 2 elements\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18481, __extension__
__PRETTY_FUNCTION__))
;
18482 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type")(static_cast <bool> ((VT == MVT::v4i32 || VT == MVT::v8i16
) && "Unexpected MVEEXT type") ? void (0) : __assert_fail
("(VT == MVT::v4i32 || VT == MVT::v8i16) && \"Unexpected MVEEXT type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18482, __extension__
__PRETTY_FUNCTION__))
;
18483
18484 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18485 *DAG.getContext());
18486 auto Extend = [&](SDValue V) {
18487 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18488 return N->getOpcode() == ARMISD::MVESEXT
18489 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18490 DAG.getValueType(ExtVT))
18491 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18492 };
18493
18494 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18495 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18496 SDValue Ext = Extend(N->getOperand(0));
18497 return DAG.getMergeValues({Ext, Ext}, DL);
18498 }
18499
18500 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18501 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18502 ArrayRef<int> Mask = SVN->getMask();
18503 assert(Mask.size() == 2 * VT.getVectorNumElements())(static_cast <bool> (Mask.size() == 2 * VT.getVectorNumElements
()) ? void (0) : __assert_fail ("Mask.size() == 2 * VT.getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18503, __extension__
__PRETTY_FUNCTION__))
;
18504 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements())(static_cast <bool> (Mask.size() == SVN->getValueType
(0).getVectorNumElements()) ? void (0) : __assert_fail ("Mask.size() == SVN->getValueType(0).getVectorNumElements()"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18504, __extension__
__PRETTY_FUNCTION__))
;
18505 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18506 SDValue Op0 = SVN->getOperand(0);
18507 SDValue Op1 = SVN->getOperand(1);
18508
18509 auto CheckInregMask = [&](int Start, int Offset) {
18510 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18511 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18512 return false;
18513 return true;
18514 };
18515 SDValue V0 = SDValue(N, 0);
18516 SDValue V1 = SDValue(N, 1);
18517 if (CheckInregMask(0, 0))
18518 V0 = Extend(Op0);
18519 else if (CheckInregMask(0, 1))
18520 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18521 else if (CheckInregMask(0, Mask.size()))
18522 V0 = Extend(Op1);
18523 else if (CheckInregMask(0, Mask.size() + 1))
18524 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18525
18526 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18527 V1 = Extend(Op1);
18528 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18529 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18530 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18531 V1 = Extend(Op0);
18532 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18533 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18534
18535 if (V0.getNode() != N || V1.getNode() != N)
18536 return DAG.getMergeValues({V0, V1}, DL);
18537 }
18538
18539 // MVEEXT(load) -> extload, extload
18540 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18541 if (SDValue L = PerformSplittingMVEEXTToWideningLoad(N, DAG))
18542 return L;
18543
18544 if (!DCI.isAfterLegalizeDAG())
18545 return SDValue();
18546
18547 // Lower to a stack store and reload:
18548 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18549 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::Fixed(16), Align(4));
18550 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18551 int NumOuts = N->getNumValues();
18552 assert((NumOuts == 2 || NumOuts == 4) &&(static_cast <bool> ((NumOuts == 2 || NumOuts == 4) &&
"Expected 2 or 4 outputs to an MVEEXT") ? void (0) : __assert_fail
("(NumOuts == 2 || NumOuts == 4) && \"Expected 2 or 4 outputs to an MVEEXT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18553, __extension__
__PRETTY_FUNCTION__))
18553 "Expected 2 or 4 outputs to an MVEEXT")(static_cast <bool> ((NumOuts == 2 || NumOuts == 4) &&
"Expected 2 or 4 outputs to an MVEEXT") ? void (0) : __assert_fail
("(NumOuts == 2 || NumOuts == 4) && \"Expected 2 or 4 outputs to an MVEEXT\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 18553, __extension__
__PRETTY_FUNCTION__))
;
18554 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18555 *DAG.getContext());
18556 if (N->getNumOperands() == 4)
18557 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18558
18559 MachinePointerInfo MPI =
18560 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);
18561 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18562 StackPtr, MPI, Align(4));
18563
18564 SmallVector<SDValue> Loads;
18565 for (int I = 0; I < NumOuts; I++) {
18566 SDValue Ptr = DAG.getNode(
18567 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18568 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18569 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(
18570 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18571 SDValue Load = DAG.getExtLoad(
18572 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18573 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18574 Loads.push_back(Load);
18575 }
18576
18577 return DAG.getMergeValues(Loads, DL);
18578}
18579
18580SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
18581 DAGCombinerInfo &DCI) const {
18582 switch (N->getOpcode()) {
18583 default: break;
18584 case ISD::SELECT_CC:
18585 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18586 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18587 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18588 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18589 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18590 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18591 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18592 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18593 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18594 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18595 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18596 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18597 case ISD::BRCOND:
18598 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18599 case ARMISD::ADDC:
18600 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18601 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18602 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18603 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18604 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18605 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18606 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18607 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18608 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18609 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
18610 case ISD::EXTRACT_VECTOR_ELT:
18611 return PerformExtractEltCombine(N, DCI, Subtarget);
18612 case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
18613 case ISD::INSERT_SUBVECTOR: return PerformInsertSubvectorCombine(N, DCI);
18614 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
18615 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18616 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18617 case ISD::FP_TO_SINT:
18618 case ISD::FP_TO_UINT:
18619 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18620 case ISD::FADD:
18621 return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget);
18622 case ISD::FDIV:
18623 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18624 case ISD::INTRINSIC_WO_CHAIN:
18625 return PerformIntrinsicCombine(N, DCI);
18626 case ISD::SHL:
18627 case ISD::SRA:
18628 case ISD::SRL:
18629 return PerformShiftCombine(N, DCI, Subtarget);
18630 case ISD::SIGN_EXTEND:
18631 case ISD::ZERO_EXTEND:
18632 case ISD::ANY_EXTEND:
18633 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18634 case ISD::FP_EXTEND:
18635 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18636 case ISD::SMIN:
18637 case ISD::UMIN:
18638 case ISD::SMAX:
18639 case ISD::UMAX:
18640 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18641 case ARMISD::CMOV:
18642 return PerformCMOVCombine(N, DCI.DAG);
18643 case ARMISD::BRCOND:
18644 return PerformBRCONDCombine(N, DCI.DAG);
18645 case ARMISD::CMPZ:
18646 return PerformCMPZCombine(N, DCI.DAG);
18647 case ARMISD::CSINC:
18648 case ARMISD::CSINV:
18649 case ARMISD::CSNEG:
18650 return PerformCSETCombine(N, DCI.DAG);
18651 case ISD::LOAD:
18652 return PerformLOADCombine(N, DCI, Subtarget);
18653 case ARMISD::VLD1DUP:
18654 case ARMISD::VLD2DUP:
18655 case ARMISD::VLD3DUP:
18656 case ARMISD::VLD4DUP:
18657 return PerformVLDCombine(N, DCI);
18658 case ARMISD::BUILD_VECTOR:
18659 return PerformARMBUILD_VECTORCombine(N, DCI);
18660 case ISD::BITCAST:
18661 return PerformBITCASTCombine(N, DCI, Subtarget);
18662 case ARMISD::PREDICATE_CAST:
18663 return PerformPREDICATE_CASTCombine(N, DCI);
18664 case ARMISD::VECTOR_REG_CAST:
18665 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18666 case ARMISD::MVETRUNC:
18667 return PerformMVETruncCombine(N, DCI);
18668 case ARMISD::MVESEXT:
18669 case ARMISD::MVEZEXT:
18670 return PerformMVEExtCombine(N, DCI);
18671 case ARMISD::VCMP:
18672 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18673 case ISD::VECREDUCE_ADD:
18674 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18675 case ARMISD::VMOVN:
18676 return PerformVMOVNCombine(N, DCI);
18677 case ARMISD::VQMOVNs:
18678 case ARMISD::VQMOVNu:
18679 return PerformVQMOVNCombine(N, DCI);
18680 case ARMISD::ASRL:
18681 case ARMISD::LSRL:
18682 case ARMISD::LSLL:
18683 return PerformLongShiftCombine(N, DCI.DAG);
18684 case ARMISD::SMULWB: {
18685 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18686 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18687 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18688 return SDValue();
18689 break;
18690 }
18691 case ARMISD::SMULWT: {
18692 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18693 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18694 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18695 return SDValue();
18696 break;
18697 }
18698 case ARMISD::SMLALBB:
18699 case ARMISD::QADD16b:
18700 case ARMISD::QSUB16b:
18701 case ARMISD::UQADD16b:
18702 case ARMISD::UQSUB16b: {
18703 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18704 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18705 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18706 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18707 return SDValue();
18708 break;
18709 }
18710 case ARMISD::SMLALBT: {
18711 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
18712 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18713 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
18714 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18715 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
18716 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
18717 return SDValue();
18718 break;
18719 }
18720 case ARMISD::SMLALTB: {
18721 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
18722 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
18723 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
18724 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
18725 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
18726 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
18727 return SDValue();
18728 break;
18729 }
18730 case ARMISD::SMLALTT: {
18731 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18732 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18733 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18734 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18735 return SDValue();
18736 break;
18737 }
18738 case ARMISD::QADD8b:
18739 case ARMISD::QSUB8b:
18740 case ARMISD::UQADD8b:
18741 case ARMISD::UQSUB8b: {
18742 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18743 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
18744 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18745 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18746 return SDValue();
18747 break;
18748 }
18749 case ISD::INTRINSIC_VOID:
18750 case ISD::INTRINSIC_W_CHAIN:
18751 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
18752 case Intrinsic::arm_neon_vld1:
18753 case Intrinsic::arm_neon_vld1x2:
18754 case Intrinsic::arm_neon_vld1x3:
18755 case Intrinsic::arm_neon_vld1x4:
18756 case Intrinsic::arm_neon_vld2:
18757 case Intrinsic::arm_neon_vld3:
18758 case Intrinsic::arm_neon_vld4:
18759 case Intrinsic::arm_neon_vld2lane:
18760 case Intrinsic::arm_neon_vld3lane:
18761 case Intrinsic::arm_neon_vld4lane:
18762 case Intrinsic::arm_neon_vld2dup:
18763 case Intrinsic::arm_neon_vld3dup:
18764 case Intrinsic::arm_neon_vld4dup:
18765 case Intrinsic::arm_neon_vst1:
18766 case Intrinsic::arm_neon_vst1x2:
18767 case Intrinsic::arm_neon_vst1x3:
18768 case Intrinsic::arm_neon_vst1x4:
18769 case Intrinsic::arm_neon_vst2:
18770 case Intrinsic::arm_neon_vst3:
18771 case Intrinsic::arm_neon_vst4:
18772 case Intrinsic::arm_neon_vst2lane:
18773 case Intrinsic::arm_neon_vst3lane:
18774 case Intrinsic::arm_neon_vst4lane:
18775 return PerformVLDCombine(N, DCI);
18776 case Intrinsic::arm_mve_vld2q:
18777 case Intrinsic::arm_mve_vld4q:
18778 case Intrinsic::arm_mve_vst2q:
18779 case Intrinsic::arm_mve_vst4q:
18780 return PerformMVEVLDCombine(N, DCI);
18781 default: break;
18782 }
18783 break;
18784 }
18785 return SDValue();
18786}
18787
18788bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
18789 EVT VT) const {
18790 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
18791}
18792
18793bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
18794 Align Alignment,
18795 MachineMemOperand::Flags,
18796 bool *Fast) const {
18797 // Depends what it gets converted into if the type is weird.
18798 if (!VT.isSimple())
18799 return false;
18800
18801 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
18802 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
18803 auto Ty = VT.getSimpleVT().SimpleTy;
18804
18805 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
18806 // Unaligned access can use (for example) LRDB, LRDH, LDR
18807 if (AllowsUnaligned) {
18808 if (Fast)
18809 *Fast = Subtarget->hasV7Ops();
18810 return true;
18811 }
18812 }
18813
18814 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
18815 // For any little-endian targets with neon, we can support unaligned ld/st
18816 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
18817 // A big-endian target may also explicitly support unaligned accesses
18818 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
18819 if (Fast)
18820 *Fast = true;
18821 return true;
18822 }
18823 }
18824
18825 if (!Subtarget->hasMVEIntegerOps())
18826 return false;
18827
18828 // These are for predicates
18829 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
18830 Ty == MVT::v2i1)) {
18831 if (Fast)
18832 *Fast = true;
18833 return true;
18834 }
18835
18836 // These are for truncated stores/narrowing loads. They are fine so long as
18837 // the alignment is at least the size of the item being loaded
18838 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
18839 Alignment >= VT.getScalarSizeInBits() / 8) {
18840 if (Fast)
18841 *Fast = true;
18842 return true;
18843 }
18844
18845 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
18846 // VSTRW.U32 all store the vector register in exactly the same format, and
18847 // differ only in the range of their immediate offset field and the required
18848 // alignment. So there is always a store that can be used, regardless of
18849 // actual type.
18850 //
18851 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
18852 // VREV64.8) pair and get the same effect. This will likely be better than
18853 // aligning the vector through the stack.
18854 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
18855 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
18856 Ty == MVT::v2f64) {
18857 if (Fast)
18858 *Fast = true;
18859 return true;
18860 }
18861
18862 return false;
18863}
18864
18865
18866EVT ARMTargetLowering::getOptimalMemOpType(
18867 const MemOp &Op, const AttributeList &FuncAttributes) const {
18868 // See if we can use NEON instructions for this...
18869 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
18870 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
18871 bool Fast;
18872 if (Op.size() >= 16 &&
18873 (Op.isAligned(Align(16)) ||
18874 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
18875 MachineMemOperand::MONone, &Fast) &&
18876 Fast))) {
18877 return MVT::v2f64;
18878 } else if (Op.size() >= 8 &&
18879 (Op.isAligned(Align(8)) ||
18880 (allowsMisalignedMemoryAccesses(
18881 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
18882 Fast))) {
18883 return MVT::f64;
18884 }
18885 }
18886
18887 // Let the target-independent logic figure it out.
18888 return MVT::Other;
18889}
18890
18891// 64-bit integers are split into their high and low parts and held in two
18892// different registers, so the trunc is free since the low register can just
18893// be used.
18894bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
18895 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
18896 return false;
18897 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
18898 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
18899 return (SrcBits == 64 && DestBits == 32);
18900}
18901
18902bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
18903 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
18904 !DstVT.isInteger())
18905 return false;
18906 unsigned SrcBits = SrcVT.getSizeInBits();
18907 unsigned DestBits = DstVT.getSizeInBits();
18908 return (SrcBits == 64 && DestBits == 32);
18909}
18910
18911bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
18912 if (Val.getOpcode() != ISD::LOAD)
18913 return false;
18914
18915 EVT VT1 = Val.getValueType();
18916 if (!VT1.isSimple() || !VT1.isInteger() ||
18917 !VT2.isSimple() || !VT2.isInteger())
18918 return false;
18919
18920 switch (VT1.getSimpleVT().SimpleTy) {
18921 default: break;
18922 case MVT::i1:
18923 case MVT::i8:
18924 case MVT::i16:
18925 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
18926 return true;
18927 }
18928
18929 return false;
18930}
18931
18932bool ARMTargetLowering::isFNegFree(EVT VT) const {
18933 if (!VT.isSimple())
18934 return false;
18935
18936 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
18937 // negate values directly (fneg is free). So, we don't want to let the DAG
18938 // combiner rewrite fneg into xors and some other instructions. For f16 and
18939 // FullFP16 argument passing, some bitcast nodes may be introduced,
18940 // triggering this DAG combine rewrite, so we are avoiding that with this.
18941 switch (VT.getSimpleVT().SimpleTy) {
18942 default: break;
18943 case MVT::f16:
18944 return Subtarget->hasFullFP16();
18945 }
18946
18947 return false;
18948}
18949
18950/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
18951/// of the vector elements.
18952static bool areExtractExts(Value *Ext1, Value *Ext2) {
18953 auto areExtDoubled = [](Instruction *Ext) {
18954 return Ext->getType()->getScalarSizeInBits() ==
18955 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
18956 };
18957
18958 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
18959 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
18960 !areExtDoubled(cast<Instruction>(Ext1)) ||
18961 !areExtDoubled(cast<Instruction>(Ext2)))
18962 return false;
18963
18964 return true;
18965}
18966
18967/// Check if sinking \p I's operands to I's basic block is profitable, because
18968/// the operands can be folded into a target instruction, e.g.
18969/// sext/zext can be folded into vsubl.
18970bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
18971 SmallVectorImpl<Use *> &Ops) const {
18972 if (!I->getType()->isVectorTy())
18973 return false;
18974
18975 if (Subtarget->hasNEON()) {
18976 switch (I->getOpcode()) {
18977 case Instruction::Sub:
18978 case Instruction::Add: {
18979 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
18980 return false;
18981 Ops.push_back(&I->getOperandUse(0));
18982 Ops.push_back(&I->getOperandUse(1));
18983 return true;
18984 }
18985 default:
18986 return false;
18987 }
18988 }
18989
18990 if (!Subtarget->hasMVEIntegerOps())
18991 return false;
18992
18993 auto IsFMSMul = [&](Instruction *I) {
18994 if (!I->hasOneUse())
18995 return false;
18996 auto *Sub = cast<Instruction>(*I->users().begin());
18997 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
18998 };
18999 auto IsFMS = [&](Instruction *I) {
19000 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19001 match(I->getOperand(1), m_FNeg(m_Value())))
19002 return true;
19003 return false;
19004 };
19005
19006 auto IsSinker = [&](Instruction *I, int Operand) {
19007 switch (I->getOpcode()) {
19008 case Instruction::Add:
19009 case Instruction::Mul:
19010 case Instruction::FAdd:
19011 case Instruction::ICmp:
19012 case Instruction::FCmp:
19013 return true;
19014 case Instruction::FMul:
19015 return !IsFMSMul(I);
19016 case Instruction::Sub:
19017 case Instruction::FSub:
19018 case Instruction::Shl:
19019 case Instruction::LShr:
19020 case Instruction::AShr:
19021 return Operand == 1;
19022 case Instruction::Call:
19023 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19024 switch (II->getIntrinsicID()) {
19025 case Intrinsic::fma:
19026 return !IsFMS(I);
19027 case Intrinsic::sadd_sat:
19028 case Intrinsic::uadd_sat:
19029 case Intrinsic::arm_mve_add_predicated:
19030 case Intrinsic::arm_mve_mul_predicated:
19031 case Intrinsic::arm_mve_qadd_predicated:
19032 case Intrinsic::arm_mve_vhadd:
19033 case Intrinsic::arm_mve_hadd_predicated:
19034 case Intrinsic::arm_mve_vqdmull:
19035 case Intrinsic::arm_mve_vqdmull_predicated:
19036 case Intrinsic::arm_mve_vqdmulh:
19037 case Intrinsic::arm_mve_qdmulh_predicated:
19038 case Intrinsic::arm_mve_vqrdmulh:
19039 case Intrinsic::arm_mve_qrdmulh_predicated:
19040 case Intrinsic::arm_mve_fma_predicated:
19041 return true;
19042 case Intrinsic::ssub_sat:
19043 case Intrinsic::usub_sat:
19044 case Intrinsic::arm_mve_sub_predicated:
19045 case Intrinsic::arm_mve_qsub_predicated:
19046 case Intrinsic::arm_mve_hsub_predicated:
19047 case Intrinsic::arm_mve_vhsub:
19048 return Operand == 1;
19049 default:
19050 return false;
19051 }
19052 }
19053 return false;
19054 default:
19055 return false;
19056 }
19057 };
19058
19059 for (auto OpIdx : enumerate(I->operands())) {
19060 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19061 // Make sure we are not already sinking this operand
19062 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19063 continue;
19064
19065 Instruction *Shuffle = Op;
19066 if (Shuffle->getOpcode() == Instruction::BitCast)
19067 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19068 // We are looking for a splat that can be sunk.
19069 if (!Shuffle ||
19070 !match(Shuffle, m_Shuffle(
19071 m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
19072 m_Undef(), m_ZeroMask())))
19073 continue;
19074 if (!IsSinker(I, OpIdx.index()))
19075 continue;
19076
19077 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19078 // and vector registers
19079 for (Use &U : Op->uses()) {
19080 Instruction *Insn = cast<Instruction>(U.getUser());
19081 if (!IsSinker(Insn, U.getOperandNo()))
19082 return false;
19083 }
19084
19085 Ops.push_back(&Shuffle->getOperandUse(0));
19086 if (Shuffle != Op)
19087 Ops.push_back(&Op->getOperandUse(0));
19088 Ops.push_back(&OpIdx.value());
19089 }
19090 return true;
19091}
19092
19093Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {
19094 if (!Subtarget->hasMVEIntegerOps())
19095 return nullptr;
19096 Type *SVIType = SVI->getType();
19097 Type *ScalarType = SVIType->getScalarType();
19098
19099 if (ScalarType->isFloatTy())
19100 return Type::getInt32Ty(SVIType->getContext());
19101 if (ScalarType->isHalfTy())
19102 return Type::getInt16Ty(SVIType->getContext());
19103 return nullptr;
19104}
19105
19106bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
19107 EVT VT = ExtVal.getValueType();
19108
19109 if (!isTypeLegal(VT))
19110 return false;
19111
19112 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19113 if (Ld->isExpandingLoad())
19114 return false;
19115 }
19116
19117 if (Subtarget->hasMVEIntegerOps())
19118 return true;
19119
19120 // Don't create a loadext if we can fold the extension into a wide/long
19121 // instruction.
19122 // If there's more than one user instruction, the loadext is desirable no
19123 // matter what. There can be two uses by the same instruction.
19124 if (ExtVal->use_empty() ||
19125 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19126 return true;
19127
19128 SDNode *U = *ExtVal->use_begin();
19129 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19130 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19131 return false;
19132
19133 return true;
19134}
19135
19136bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
19137 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19138 return false;
19139
19140 if (!isTypeLegal(EVT::getEVT(Ty1)))
19141 return false;
19142
19143 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop")(static_cast <bool> (Ty1->getPrimitiveSizeInBits() <=
64 && "i128 is probably not a noop") ? void (0) : __assert_fail
("Ty1->getPrimitiveSizeInBits() <= 64 && \"i128 is probably not a noop\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19143, __extension__
__PRETTY_FUNCTION__))
;
19144
19145 // Assuming the caller doesn't have a zeroext or signext return parameter,
19146 // truncation all the way down to i1 is valid.
19147 return true;
19148}
19149
19150/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19151/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19152/// expanded to FMAs when this method returns true, otherwise fmuladd is
19153/// expanded to fmul + fadd.
19154///
19155/// ARM supports both fused and unfused multiply-add operations; we already
19156/// lower a pair of fmul and fadd to the latter so it's not clear that there
19157/// would be a gain or that the gain would be worthwhile enough to risk
19158/// correctness bugs.
19159///
19160/// For MVE, we set this to true as it helps simplify the need for some
19161/// patterns (and we don't have the non-fused floating point instruction).
19162bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19163 EVT VT) const {
19164 if (!VT.isSimple())
19165 return false;
19166
19167 switch (VT.getSimpleVT().SimpleTy) {
19168 case MVT::v4f32:
19169 case MVT::v8f16:
19170 return Subtarget->hasMVEFloatOps();
19171 case MVT::f16:
19172 return Subtarget->useFPVFMx16();
19173 case MVT::f32:
19174 return Subtarget->useFPVFMx();
19175 case MVT::f64:
19176 return Subtarget->useFPVFMx64();
19177 default:
19178 break;
19179 }
19180
19181 return false;
19182}
19183
19184static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19185 if (V < 0)
19186 return false;
19187
19188 unsigned Scale = 1;
19189 switch (VT.getSimpleVT().SimpleTy) {
19190 case MVT::i1:
19191 case MVT::i8:
19192 // Scale == 1;
19193 break;
19194 case MVT::i16:
19195 // Scale == 2;
19196 Scale = 2;
19197 break;
19198 default:
19199 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19200 // Scale == 4;
19201 Scale = 4;
19202 break;
19203 }
19204
19205 if ((V & (Scale - 1)) != 0)
19206 return false;
19207 return isUInt<5>(V / Scale);
19208}
19209
19210static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19211 const ARMSubtarget *Subtarget) {
19212 if (!VT.isInteger() && !VT.isFloatingPoint())
19213 return false;
19214 if (VT.isVector() && Subtarget->hasNEON())
19215 return false;
19216 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19217 !Subtarget->hasMVEFloatOps())
19218 return false;
19219
19220 bool IsNeg = false;
19221 if (V < 0) {
19222 IsNeg = true;
19223 V = -V;
19224 }
19225
19226 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19227
19228 // MVE: size * imm7
19229 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19230 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19231 case MVT::i32:
19232 case MVT::f32:
19233 return isShiftedUInt<7,2>(V);
19234 case MVT::i16:
19235 case MVT::f16:
19236 return isShiftedUInt<7,1>(V);
19237 case MVT::i8:
19238 return isUInt<7>(V);
19239 default:
19240 return false;
19241 }
19242 }
19243
19244 // half VLDR: 2 * imm8
19245 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19246 return isShiftedUInt<8, 1>(V);
19247 // VLDR and LDRD: 4 * imm8
19248 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19249 return isShiftedUInt<8, 2>(V);
19250
19251 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19252 // + imm12 or - imm8
19253 if (IsNeg)
19254 return isUInt<8>(V);
19255 return isUInt<12>(V);
19256 }
19257
19258 return false;
19259}
19260
19261/// isLegalAddressImmediate - Return true if the integer value can be used
19262/// as the offset of the target addressing mode for load / store of the
19263/// given type.
19264static bool isLegalAddressImmediate(int64_t V, EVT VT,
19265 const ARMSubtarget *Subtarget) {
19266 if (V == 0)
19267 return true;
19268
19269 if (!VT.isSimple())
19270 return false;
19271
19272 if (Subtarget->isThumb1Only())
19273 return isLegalT1AddressImmediate(V, VT);
19274 else if (Subtarget->isThumb2())
19275 return isLegalT2AddressImmediate(V, VT, Subtarget);
19276
19277 // ARM mode.
19278 if (V < 0)
19279 V = - V;
19280 switch (VT.getSimpleVT().SimpleTy) {
19281 default: return false;
19282 case MVT::i1:
19283 case MVT::i8:
19284 case MVT::i32:
19285 // +- imm12
19286 return isUInt<12>(V);
19287 case MVT::i16:
19288 // +- imm8
19289 return isUInt<8>(V);
19290 case MVT::f32:
19291 case MVT::f64:
19292 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19293 return false;
19294 return isShiftedUInt<8, 2>(V);
19295 }
19296}
19297
19298bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
19299 EVT VT) const {
19300 int Scale = AM.Scale;
19301 if (Scale < 0)
19302 return false;
19303
19304 switch (VT.getSimpleVT().SimpleTy) {
19305 default: return false;
19306 case MVT::i1:
19307 case MVT::i8:
19308 case MVT::i16:
19309 case MVT::i32:
19310 if (Scale == 1)
19311 return true;
19312 // r + r << imm
19313 Scale = Scale & ~1;
19314 return Scale == 2 || Scale == 4 || Scale == 8;
19315 case MVT::i64:
19316 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19317 // version in Thumb mode.
19318 // r + r
19319 if (Scale == 1)
19320 return true;
19321 // r * 2 (this can be lowered to r + r).
19322 if (!AM.HasBaseReg && Scale == 2)
19323 return true;
19324 return false;
19325 case MVT::isVoid:
19326 // Note, we allow "void" uses (basically, uses that aren't loads or
19327 // stores), because arm allows folding a scale into many arithmetic
19328 // operations. This should be made more precise and revisited later.
19329
19330 // Allow r << imm, but the imm has to be a multiple of two.
19331 if (Scale & 1) return false;
19332 return isPowerOf2_32(Scale);
19333 }
19334}
19335
19336bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,
19337 EVT VT) const {
19338 const int Scale = AM.Scale;
19339
19340 // Negative scales are not supported in Thumb1.
19341 if (Scale < 0)
19342 return false;
19343
19344 // Thumb1 addressing modes do not support register scaling excepting the
19345 // following cases:
19346 // 1. Scale == 1 means no scaling.
19347 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19348 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19349}
19350
19351/// isLegalAddressingMode - Return true if the addressing mode represented
19352/// by AM is legal for this target, for a load/store of the specified type.
19353bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
19354 const AddrMode &AM, Type *Ty,
19355 unsigned AS, Instruction *I) const {
19356 EVT VT = getValueType(DL, Ty, true);
19357 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19358 return false;
19359
19360 // Can never fold addr of global into load/store.
19361 if (AM.BaseGV)
19362 return false;
19363
19364 switch (AM.Scale) {
19365 case 0: // no scale reg, must be "r+i" or "r", or "i".
19366 break;
19367 default:
19368 // ARM doesn't support any R+R*scale+imm addr modes.
19369 if (AM.BaseOffs)
19370 return false;
19371
19372 if (!VT.isSimple())
19373 return false;
19374
19375 if (Subtarget->isThumb1Only())
19376 return isLegalT1ScaledAddressingMode(AM, VT);
19377
19378 if (Subtarget->isThumb2())
19379 return isLegalT2ScaledAddressingMode(AM, VT);
19380
19381 int Scale = AM.Scale;
19382 switch (VT.getSimpleVT().SimpleTy) {
19383 default: return false;
19384 case MVT::i1:
19385 case MVT::i8:
19386 case MVT::i32:
19387 if (Scale < 0) Scale = -Scale;
19388 if (Scale == 1)
19389 return true;
19390 // r + r << imm
19391 return isPowerOf2_32(Scale & ~1);
19392 case MVT::i16:
19393 case MVT::i64:
19394 // r +/- r
19395 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19396 return true;
19397 // r * 2 (this can be lowered to r + r).
19398 if (!AM.HasBaseReg && Scale == 2)
19399 return true;
19400 return false;
19401
19402 case MVT::isVoid:
19403 // Note, we allow "void" uses (basically, uses that aren't loads or
19404 // stores), because arm allows folding a scale into many arithmetic
19405 // operations. This should be made more precise and revisited later.
19406
19407 // Allow r << imm, but the imm has to be a multiple of two.
19408 if (Scale & 1) return false;
19409 return isPowerOf2_32(Scale);
19410 }
19411 }
19412 return true;
19413}
19414
19415/// isLegalICmpImmediate - Return true if the specified immediate is legal
19416/// icmp immediate, that is the target has icmp instructions which can compare
19417/// a register against the immediate without having to materialize the
19418/// immediate into a register.
19419bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
19420 // Thumb2 and ARM modes can use cmn for negative immediates.
19421 if (!Subtarget->isThumb())
19422 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19423 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19424 if (Subtarget->isThumb2())
19425 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19426 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19427 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19428 return Imm >= 0 && Imm <= 255;
19429}
19430
19431/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19432/// *or sub* immediate, that is the target has add or sub instructions which can
19433/// add a register with the immediate without having to materialize the
19434/// immediate into a register.
19435bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
19436 // Same encoding for add/sub, just flip the sign.
19437 int64_t AbsImm = std::abs(Imm);
19438 if (!Subtarget->isThumb())
19439 return ARM_AM::getSOImmVal(AbsImm) != -1;
19440 if (Subtarget->isThumb2())
19441 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19442 // Thumb1 only has 8-bit unsigned immediate.
19443 return AbsImm >= 0 && AbsImm <= 255;
19444}
19445
19446// Return false to prevent folding
19447// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19448// if the folding leads to worse code.
19449bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
19450 SDValue ConstNode) const {
19451 // Let the DAGCombiner decide for vector types and large types.
19452 const EVT VT = AddNode.getValueType();
19453 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19454 return true;
19455
19456 // It is worse if c0 is legal add immediate, while c1*c0 is not
19457 // and has to be composed by at least two instructions.
19458 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19459 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19460 const int64_t C0 = C0Node->getSExtValue();
19461 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19462 if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))
19463 return true;
19464 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19465 return false;
19466
19467 // Default to true and let the DAGCombiner decide.
19468 return true;
19469}
19470
19471static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
19472 bool isSEXTLoad, SDValue &Base,
19473 SDValue &Offset, bool &isInc,
19474 SelectionDAG &DAG) {
19475 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19476 return false;
19477
19478 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19479 // AddressingMode 3
19480 Base = Ptr->getOperand(0);
19481 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19482 int RHSC = (int)RHS->getZExtValue();
19483 if (RHSC < 0 && RHSC > -256) {
19484 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19484, __extension__
__PRETTY_FUNCTION__))
;
19485 isInc = false;
19486 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19487 return true;
19488 }
19489 }
19490 isInc = (Ptr->getOpcode() == ISD::ADD);
19491 Offset = Ptr->getOperand(1);
19492 return true;
19493 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19494 // AddressingMode 2
19495 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19496 int RHSC = (int)RHS->getZExtValue();
19497 if (RHSC < 0 && RHSC > -0x1000) {
19498 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19498, __extension__
__PRETTY_FUNCTION__))
;
19499 isInc = false;
19500 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19501 Base = Ptr->getOperand(0);
19502 return true;
19503 }
19504 }
19505
19506 if (Ptr->getOpcode() == ISD::ADD) {
19507 isInc = true;
19508 ARM_AM::ShiftOpc ShOpcVal=
19509 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19510 if (ShOpcVal != ARM_AM::no_shift) {
19511 Base = Ptr->getOperand(1);
19512 Offset = Ptr->getOperand(0);
19513 } else {
19514 Base = Ptr->getOperand(0);
19515 Offset = Ptr->getOperand(1);
19516 }
19517 return true;
19518 }
19519
19520 isInc = (Ptr->getOpcode() == ISD::ADD);
19521 Base = Ptr->getOperand(0);
19522 Offset = Ptr->getOperand(1);
19523 return true;
19524 }
19525
19526 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19527 return false;
19528}
19529
19530static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
19531 bool isSEXTLoad, SDValue &Base,
19532 SDValue &Offset, bool &isInc,
19533 SelectionDAG &DAG) {
19534 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19535 return false;
19536
19537 Base = Ptr->getOperand(0);
19538 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19539 int RHSC = (int)RHS->getZExtValue();
19540 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19541 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19541, __extension__
__PRETTY_FUNCTION__))
;
19542 isInc = false;
19543 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19544 return true;
19545 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19546 isInc = Ptr->getOpcode() == ISD::ADD;
19547 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19548 return true;
19549 }
19550 }
19551
19552 return false;
19553}
19554
19555static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19556 bool isSEXTLoad, bool IsMasked, bool isLE,
19557 SDValue &Base, SDValue &Offset,
19558 bool &isInc, SelectionDAG &DAG) {
19559 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19560 return false;
19561 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19562 return false;
19563
19564 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19565 // as opposed to a vldrw.32). This can allow extra addressing modes or
19566 // alignments for what is otherwise an equivalent instruction.
19567 bool CanChangeType = isLE && !IsMasked;
19568
19569 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19570 int RHSC = (int)RHS->getZExtValue();
19571
19572 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19573 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19574 assert(Ptr->getOpcode() == ISD::ADD)(static_cast <bool> (Ptr->getOpcode() == ISD::ADD) ?
void (0) : __assert_fail ("Ptr->getOpcode() == ISD::ADD",
"llvm/lib/Target/ARM/ARMISelLowering.cpp", 19574, __extension__
__PRETTY_FUNCTION__))
;
19575 isInc = false;
19576 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19577 return true;
19578 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19579 isInc = Ptr->getOpcode() == ISD::ADD;
19580 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19581 return true;
19582 }
19583 return false;
19584 };
19585
19586 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19587 // (in BE/masked) type.
19588 Base = Ptr->getOperand(0);
19589 if (VT == MVT::v4i16) {
19590 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19591 return true;
19592 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19593 if (IsInRange(RHSC, 0x80, 1))
19594 return true;
19595 } else if (Alignment >= 4 &&
19596 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19597 IsInRange(RHSC, 0x80, 4))
19598 return true;
19599 else if (Alignment >= 2 &&
19600 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19601 IsInRange(RHSC, 0x80, 2))
19602 return true;
19603 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19604 return true;
19605 return false;
19606}
19607
19608/// getPreIndexedAddressParts - returns true by value, base pointer and
19609/// offset pointer and addressing mode by reference if the node's address
19610/// can be legally represented as pre-indexed load / store address.
19611bool
19612ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
19613 SDValue &Offset,
19614 ISD::MemIndexedMode &AM,
19615 SelectionDAG &DAG) const {
19616 if (Subtarget->isThumb1Only())
19617 return false;
19618
19619 EVT VT;
19620 SDValue Ptr;
19621 Align Alignment;
19622 bool isSEXTLoad = false;
19623 bool IsMasked = false;
19624 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19625 Ptr = LD->getBasePtr();
19626 VT = LD->getMemoryVT();
19627 Alignment = LD->getAlign();
19628 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19629 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19630 Ptr = ST->getBasePtr();
19631 VT = ST->getMemoryVT();
19632 Alignment = ST->getAlign();
19633 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19634 Ptr = LD->getBasePtr();
19635 VT = LD->getMemoryVT();
19636 Alignment = LD->getAlign();
19637 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19638 IsMasked = true;
19639 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19640 Ptr = ST->getBasePtr();
19641 VT = ST->getMemoryVT();
19642 Alignment = ST->getAlign();
19643 IsMasked = true;
19644 } else
19645 return false;
19646
19647 bool isInc;
19648 bool isLegal = false;
19649 if (VT.isVector())
19650 isLegal = Subtarget->hasMVEIntegerOps() &&
19651 getMVEIndexedAddressParts(
19652 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19653 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19654 else {
19655 if (Subtarget->isThumb2())
19656 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19657 Offset, isInc, DAG);
19658 else
19659 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19660 Offset, isInc, DAG);
19661 }
19662 if (!isLegal)
19663 return false;
19664
19665 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19666 return true;
19667}
19668
19669/// getPostIndexedAddressParts - returns true by value, base pointer and
19670/// offset pointer and addressing mode by reference if this node can be
19671/// combined with a load / store to form a post-indexed load / store.
19672bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
19673 SDValue &Base,
19674 SDValue &Offset,
19675 ISD::MemIndexedMode &AM,
19676 SelectionDAG &DAG) const {
19677 EVT VT;
19678 SDValue Ptr;
19679 Align Alignment;
19680 bool isSEXTLoad = false, isNonExt;
19681 bool IsMasked = false;
19682 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19683 VT = LD->getMemoryVT();
19684 Ptr = LD->getBasePtr();
19685 Alignment = LD->getAlign();
19686 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19687 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19688 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19689 VT = ST->getMemoryVT();
19690 Ptr = ST->getBasePtr();
19691 Alignment = ST->getAlign();
19692 isNonExt = !ST->isTruncatingStore();
19693 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19694 VT = LD->getMemoryVT();
19695 Ptr = LD->getBasePtr();
19696 Alignment = LD->getAlign();
19697 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19698 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19699 IsMasked = true;
19700 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19701 VT = ST->getMemoryVT();
19702 Ptr = ST->getBasePtr();
19703 Alignment = ST->getAlign();
19704 isNonExt = !ST->isTruncatingStore();
19705 IsMasked = true;
19706 } else
19707 return false;
19708
19709 if (Subtarget->isThumb1Only()) {
19710 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19711 // must be non-extending/truncating, i32, with an offset of 4.
19712 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!")(static_cast <bool> (Op->getValueType(0) == MVT::i32
&& "Non-i32 post-inc op?!") ? void (0) : __assert_fail
("Op->getValueType(0) == MVT::i32 && \"Non-i32 post-inc op?!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19712, __extension__
__PRETTY_FUNCTION__))
;
19713 if (Op->getOpcode() != ISD::ADD || !isNonExt)
19714 return false;
19715 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
19716 if (!RHS || RHS->getZExtValue() != 4)
19717 return false;
19718 if (Alignment < Align(4))
19719 return false;
19720
19721 Offset = Op->getOperand(1);
19722 Base = Op->getOperand(0);
19723 AM = ISD::POST_INC;
19724 return true;
19725 }
19726
19727 bool isInc;
19728 bool isLegal = false;
19729 if (VT.isVector())
19730 isLegal = Subtarget->hasMVEIntegerOps() &&
19731 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
19732 Subtarget->isLittle(), Base, Offset,
19733 isInc, DAG);
19734 else {
19735 if (Subtarget->isThumb2())
19736 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19737 isInc, DAG);
19738 else
19739 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
19740 isInc, DAG);
19741 }
19742 if (!isLegal)
19743 return false;
19744
19745 if (Ptr != Base) {
19746 // Swap base ptr and offset to catch more post-index load / store when
19747 // it's legal. In Thumb2 mode, offset must be an immediate.
19748 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
19749 !Subtarget->isThumb2())
19750 std::swap(Base, Offset);
19751
19752 // Post-indexed load / store update the base pointer.
19753 if (Ptr != Base)
19754 return false;
19755 }
19756
19757 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
19758 return true;
19759}
19760
19761void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
19762 KnownBits &Known,
19763 const APInt &DemandedElts,
19764 const SelectionDAG &DAG,
19765 unsigned Depth) const {
19766 unsigned BitWidth = Known.getBitWidth();
19767 Known.resetAll();
19768 switch (Op.getOpcode()) {
19769 default: break;
19770 case ARMISD::ADDC:
19771 case ARMISD::ADDE:
19772 case ARMISD::SUBC:
19773 case ARMISD::SUBE:
19774 // Special cases when we convert a carry to a boolean.
19775 if (Op.getResNo() == 0) {
19776 SDValue LHS = Op.getOperand(0);
19777 SDValue RHS = Op.getOperand(1);
19778 // (ADDE 0, 0, C) will give us a single bit.
19779 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
19780 isNullConstant(RHS)) {
19781 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
19782 return;
19783 }
19784 }
19785 break;
19786 case ARMISD::CMOV: {
19787 // Bits are known zero/one if known on the LHS and RHS.
19788 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
19789 if (Known.isUnknown())
19790 return;
19791
19792 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
19793 Known = KnownBits::commonBits(Known, KnownRHS);
19794 return;
19795 }
19796 case ISD::INTRINSIC_W_CHAIN: {
19797 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
19798 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
19799 switch (IntID) {
19800 default: return;
19801 case Intrinsic::arm_ldaex:
19802 case Intrinsic::arm_ldrex: {
19803 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
19804 unsigned MemBits = VT.getScalarSizeInBits();
19805 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
19806 return;
19807 }
19808 }
19809 }
19810 case ARMISD::BFI: {
19811 // Conservatively, we can recurse down the first operand
19812 // and just mask out all affected bits.
19813 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
19814
19815 // The operand to BFI is already a mask suitable for removing the bits it
19816 // sets.
19817 ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
19818 const APInt &Mask = CI->getAPIntValue();
19819 Known.Zero &= Mask;
19820 Known.One &= Mask;
19821 return;
19822 }
19823 case ARMISD::VGETLANEs:
19824 case ARMISD::VGETLANEu: {
19825 const SDValue &SrcSV = Op.getOperand(0);
19826 EVT VecVT = SrcSV.getValueType();
19827 assert(VecVT.isVector() && "VGETLANE expected a vector type")(static_cast <bool> (VecVT.isVector() && "VGETLANE expected a vector type"
) ? void (0) : __assert_fail ("VecVT.isVector() && \"VGETLANE expected a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19827, __extension__
__PRETTY_FUNCTION__))
;
19828 const unsigned NumSrcElts = VecVT.getVectorNumElements();
19829 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
19830 assert(Pos->getAPIntValue().ult(NumSrcElts) &&(static_cast <bool> (Pos->getAPIntValue().ult(NumSrcElts
) && "VGETLANE index out of bounds") ? void (0) : __assert_fail
("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19831, __extension__
__PRETTY_FUNCTION__))
19831 "VGETLANE index out of bounds")(static_cast <bool> (Pos->getAPIntValue().ult(NumSrcElts
) && "VGETLANE index out of bounds") ? void (0) : __assert_fail
("Pos->getAPIntValue().ult(NumSrcElts) && \"VGETLANE index out of bounds\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19831, __extension__
__PRETTY_FUNCTION__))
;
19832 unsigned Idx = Pos->getZExtValue();
19833 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
19834 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
19835
19836 EVT VT = Op.getValueType();
19837 const unsigned DstSz = VT.getScalarSizeInBits();
19838 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
19839 (void)SrcSz;
19840 assert(SrcSz == Known.getBitWidth())(static_cast <bool> (SrcSz == Known.getBitWidth()) ? void
(0) : __assert_fail ("SrcSz == Known.getBitWidth()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19840, __extension__ __PRETTY_FUNCTION__))
;
19841 assert(DstSz > SrcSz)(static_cast <bool> (DstSz > SrcSz) ? void (0) : __assert_fail
("DstSz > SrcSz", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19841, __extension__ __PRETTY_FUNCTION__))
;
19842 if (Op.getOpcode() == ARMISD::VGETLANEs)
19843 Known = Known.sext(DstSz);
19844 else {
19845 Known = Known.zext(DstSz);
19846 }
19847 assert(DstSz == Known.getBitWidth())(static_cast <bool> (DstSz == Known.getBitWidth()) ? void
(0) : __assert_fail ("DstSz == Known.getBitWidth()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19847, __extension__ __PRETTY_FUNCTION__))
;
19848 break;
19849 }
19850 case ARMISD::VMOVrh: {
19851 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19852 assert(KnownOp.getBitWidth() == 16)(static_cast <bool> (KnownOp.getBitWidth() == 16) ? void
(0) : __assert_fail ("KnownOp.getBitWidth() == 16", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 19852, __extension__ __PRETTY_FUNCTION__))
;
19853 Known = KnownOp.zext(32);
19854 break;
19855 }
19856 case ARMISD::CSINC:
19857 case ARMISD::CSINV:
19858 case ARMISD::CSNEG: {
19859 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
19860 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
19861
19862 // The result is either:
19863 // CSINC: KnownOp0 or KnownOp1 + 1
19864 // CSINV: KnownOp0 or ~KnownOp1
19865 // CSNEG: KnownOp0 or KnownOp1 * -1
19866 if (Op.getOpcode() == ARMISD::CSINC)
19867 KnownOp1 = KnownBits::computeForAddSub(
19868 true, false, KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
19869 else if (Op.getOpcode() == ARMISD::CSINV)
19870 std::swap(KnownOp1.Zero, KnownOp1.One);
19871 else if (Op.getOpcode() == ARMISD::CSNEG)
19872 KnownOp1 = KnownBits::mul(
19873 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
19874
19875 Known = KnownBits::commonBits(KnownOp0, KnownOp1);
19876 break;
19877 }
19878 }
19879}
19880
19881bool ARMTargetLowering::targetShrinkDemandedConstant(
19882 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
19883 TargetLoweringOpt &TLO) const {
19884 // Delay optimization, so we don't have to deal with illegal types, or block
19885 // optimizations.
19886 if (!TLO.LegalOps)
19887 return false;
19888
19889 // Only optimize AND for now.
19890 if (Op.getOpcode() != ISD::AND)
19891 return false;
19892
19893 EVT VT = Op.getValueType();
19894
19895 // Ignore vectors.
19896 if (VT.isVector())
19897 return false;
19898
19899 assert(VT == MVT::i32 && "Unexpected integer type")(static_cast <bool> (VT == MVT::i32 && "Unexpected integer type"
) ? void (0) : __assert_fail ("VT == MVT::i32 && \"Unexpected integer type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 19899, __extension__
__PRETTY_FUNCTION__))
;
19900
19901 // Make sure the RHS really is a constant.
19902 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
19903 if (!C)
19904 return false;
19905
19906 unsigned Mask = C->getZExtValue();
19907
19908 unsigned Demanded = DemandedBits.getZExtValue();
19909 unsigned ShrunkMask = Mask & Demanded;
19910 unsigned ExpandedMask = Mask | ~Demanded;
19911
19912 // If the mask is all zeros, let the target-independent code replace the
19913 // result with zero.
19914 if (ShrunkMask == 0)
19915 return false;
19916
19917 // If the mask is all ones, erase the AND. (Currently, the target-independent
19918 // code won't do this, so we have to do it explicitly to avoid an infinite
19919 // loop in obscure cases.)
19920 if (ExpandedMask == ~0U)
19921 return TLO.CombineTo(Op, Op.getOperand(0));
19922
19923 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
19924 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
19925 };
19926 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
19927 if (NewMask == Mask)
19928 return true;
19929 SDLoc DL(Op);
19930 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
19931 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
19932 return TLO.CombineTo(Op, NewOp);
19933 };
19934
19935 // Prefer uxtb mask.
19936 if (IsLegalMask(0xFF))
19937 return UseMask(0xFF);
19938
19939 // Prefer uxth mask.
19940 if (IsLegalMask(0xFFFF))
19941 return UseMask(0xFFFF);
19942
19943 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
19944 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
19945 if (ShrunkMask < 256)
19946 return UseMask(ShrunkMask);
19947
19948 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
19949 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
19950 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
19951 return UseMask(ExpandedMask);
19952
19953 // Potential improvements:
19954 //
19955 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
19956 // We could try to prefer Thumb1 immediates which can be lowered to a
19957 // two-instruction sequence.
19958 // We could try to recognize more legal ARM/Thumb2 immediates here.
19959
19960 return false;
19961}
19962
19963bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
19964 SDValue Op, const APInt &OriginalDemandedBits,
19965 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
19966 unsigned Depth) const {
19967 unsigned Opc = Op.getOpcode();
19968
19969 switch (Opc) {
19970 case ARMISD::ASRL:
19971 case ARMISD::LSRL: {
19972 // If this is result 0 and the other result is unused, see if the demand
19973 // bits allow us to shrink this long shift into a standard small shift in
19974 // the opposite direction.
19975 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
19976 isa<ConstantSDNode>(Op->getOperand(2))) {
19977 unsigned ShAmt = Op->getConstantOperandVal(2);
19978 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
19979 << (32 - ShAmt)))
19980 return TLO.CombineTo(
19981 Op, TLO.DAG.getNode(
19982 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
19983 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
19984 }
19985 break;
19986 }
19987 case ARMISD::VBICIMM: {
19988 SDValue Op0 = Op.getOperand(0);
19989 unsigned ModImm = Op.getConstantOperandVal(1);
19990 unsigned EltBits = 0;
19991 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
19992 if ((OriginalDemandedBits & Mask) == 0)
19993 return TLO.CombineTo(Op, Op0);
19994 }
19995 }
19996
19997 return TargetLowering::SimplifyDemandedBitsForTargetNode(
19998 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
19999}
20000
20001//===----------------------------------------------------------------------===//
20002// ARM Inline Assembly Support
20003//===----------------------------------------------------------------------===//
20004
20005bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
20006 // Looking for "rev" which is V6+.
20007 if (!Subtarget->hasV6Ops())
20008 return false;
20009
20010 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20011 std::string AsmStr = IA->getAsmString();
20012 SmallVector<StringRef, 4> AsmPieces;
20013 SplitString(AsmStr, AsmPieces, ";\n");
20014
20015 switch (AsmPieces.size()) {
20016 default: return false;
20017 case 1:
20018 AsmStr = std::string(AsmPieces[0]);
20019 AsmPieces.clear();
20020 SplitString(AsmStr, AsmPieces, " \t,");
20021
20022 // rev $0, $1
20023 if (AsmPieces.size() == 3 &&
20024 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20025 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20026 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20027 if (Ty && Ty->getBitWidth() == 32)
20028 return IntrinsicLowering::LowerToByteSwap(CI);
20029 }
20030 break;
20031 }
20032
20033 return false;
20034}
20035
20036const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20037 // At this point, we have to lower this constraint to something else, so we
20038 // lower it to an "r" or "w". However, by doing this we will force the result
20039 // to be in register, while the X constraint is much more permissive.
20040 //
20041 // Although we are correct (we are free to emit anything, without
20042 // constraints), we might break use cases that would expect us to be more
20043 // efficient and emit something else.
20044 if (!Subtarget->hasVFP2Base())
20045 return "r";
20046 if (ConstraintVT.isFloatingPoint())
20047 return "w";
20048 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20049 (ConstraintVT.getSizeInBits() == 64 ||
20050 ConstraintVT.getSizeInBits() == 128))
20051 return "w";
20052
20053 return "r";
20054}
20055
20056/// getConstraintType - Given a constraint letter, return the type of
20057/// constraint it is for this target.
20058ARMTargetLowering::ConstraintType
20059ARMTargetLowering::getConstraintType(StringRef Constraint) const {
20060 unsigned S = Constraint.size();
20061 if (S == 1) {
20062 switch (Constraint[0]) {
20063 default: break;
20064 case 'l': return C_RegisterClass;
20065 case 'w': return C_RegisterClass;
20066 case 'h': return C_RegisterClass;
20067 case 'x': return C_RegisterClass;
20068 case 't': return C_RegisterClass;
20069 case 'j': return C_Immediate; // Constant for movw.
20070 // An address with a single base register. Due to the way we
20071 // currently handle addresses it is the same as an 'r' memory constraint.
20072 case 'Q': return C_Memory;
20073 }
20074 } else if (S == 2) {
20075 switch (Constraint[0]) {
20076 default: break;
20077 case 'T': return C_RegisterClass;
20078 // All 'U+' constraints are addresses.
20079 case 'U': return C_Memory;
20080 }
20081 }
20082 return TargetLowering::getConstraintType(Constraint);
20083}
20084
20085/// Examine constraint type and operand type and determine a weight value.
20086/// This object must already have been set up with the operand type
20087/// and the current alternative constraint selected.
20088TargetLowering::ConstraintWeight
20089ARMTargetLowering::getSingleConstraintMatchWeight(
20090 AsmOperandInfo &info, const char *constraint) const {
20091 ConstraintWeight weight = CW_Invalid;
20092 Value *CallOperandVal = info.CallOperandVal;
20093 // If we don't have a value, we can't do a match,
20094 // but allow it at the lowest weight.
20095 if (!CallOperandVal)
20096 return CW_Default;
20097 Type *type = CallOperandVal->getType();
20098 // Look at the constraint type.
20099 switch (*constraint) {
20100 default:
20101 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
20102 break;
20103 case 'l':
20104 if (type->isIntegerTy()) {
20105 if (Subtarget->isThumb())
20106 weight = CW_SpecificReg;
20107 else
20108 weight = CW_Register;
20109 }
20110 break;
20111 case 'w':
20112 if (type->isFloatingPointTy())
20113 weight = CW_Register;
20114 break;
20115 }
20116 return weight;
20117}
20118
20119using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20120
20121RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
20122 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20123 switch (Constraint.size()) {
20124 case 1:
20125 // GCC ARM Constraint Letters
20126 switch (Constraint[0]) {
20127 case 'l': // Low regs or general regs.
20128 if (Subtarget->isThumb())
20129 return RCPair(0U, &ARM::tGPRRegClass);
20130 return RCPair(0U, &ARM::GPRRegClass);
20131 case 'h': // High regs or no regs.
20132 if (Subtarget->isThumb())
20133 return RCPair(0U, &ARM::hGPRRegClass);
20134 break;
20135 case 'r':
20136 if (Subtarget->isThumb1Only())
20137 return RCPair(0U, &ARM::tGPRRegClass);
20138 return RCPair(0U, &ARM::GPRRegClass);
20139 case 'w':
20140 if (VT == MVT::Other)
20141 break;
20142 if (VT == MVT::f32)
20143 return RCPair(0U, &ARM::SPRRegClass);
20144 if (VT.getSizeInBits() == 64)
20145 return RCPair(0U, &ARM::DPRRegClass);
20146 if (VT.getSizeInBits() == 128)
20147 return RCPair(0U, &ARM::QPRRegClass);
20148 break;
20149 case 'x':
20150 if (VT == MVT::Other)
20151 break;
20152 if (VT == MVT::f32)
20153 return RCPair(0U, &ARM::SPR_8RegClass);
20154 if (VT.getSizeInBits() == 64)
20155 return RCPair(0U, &ARM::DPR_8RegClass);
20156 if (VT.getSizeInBits() == 128)
20157 return RCPair(0U, &ARM::QPR_8RegClass);
20158 break;
20159 case 't':
20160 if (VT == MVT::Other)
20161 break;
20162 if (VT == MVT::f32 || VT == MVT::i32)
20163 return RCPair(0U, &ARM::SPRRegClass);
20164 if (VT.getSizeInBits() == 64)
20165 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20166 if (VT.getSizeInBits() == 128)
20167 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20168 break;
20169 }
20170 break;
20171
20172 case 2:
20173 if (Constraint[0] == 'T') {
20174 switch (Constraint[1]) {
20175 default:
20176 break;
20177 case 'e':
20178 return RCPair(0U, &ARM::tGPREvenRegClass);
20179 case 'o':
20180 return RCPair(0U, &ARM::tGPROddRegClass);
20181 }
20182 }
20183 break;
20184
20185 default:
20186 break;
20187 }
20188
20189 if (StringRef("{cc}").equals_insensitive(Constraint))
20190 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20191
20192 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20193}
20194
20195/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20196/// vector. If it is invalid, don't add anything to Ops.
20197void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
20198 std::string &Constraint,
20199 std::vector<SDValue>&Ops,
20200 SelectionDAG &DAG) const {
20201 SDValue Result;
20202
20203 // Currently only support length 1 constraints.
20204 if (Constraint.length() != 1) return;
20205
20206 char ConstraintLetter = Constraint[0];
20207 switch (ConstraintLetter) {
20208 default: break;
20209 case 'j':
20210 case 'I': case 'J': case 'K': case 'L':
20211 case 'M': case 'N': case 'O':
20212 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20213 if (!C)
20214 return;
20215
20216 int64_t CVal64 = C->getSExtValue();
20217 int CVal = (int) CVal64;
20218 // None of these constraints allow values larger than 32 bits. Check
20219 // that the value fits in an int.
20220 if (CVal != CVal64)
20221 return;
20222
20223 switch (ConstraintLetter) {
20224 case 'j':
20225 // Constant suitable for movw, must be between 0 and
20226 // 65535.
20227 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20228 if (CVal >= 0 && CVal <= 65535)
20229 break;
20230 return;
20231 case 'I':
20232 if (Subtarget->isThumb1Only()) {
20233 // This must be a constant between 0 and 255, for ADD
20234 // immediates.
20235 if (CVal >= 0 && CVal <= 255)
20236 break;
20237 } else if (Subtarget->isThumb2()) {
20238 // A constant that can be used as an immediate value in a
20239 // data-processing instruction.
20240 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20241 break;
20242 } else {
20243 // A constant that can be used as an immediate value in a
20244 // data-processing instruction.
20245 if (ARM_AM::getSOImmVal(CVal) != -1)
20246 break;
20247 }
20248 return;
20249
20250 case 'J':
20251 if (Subtarget->isThumb1Only()) {
20252 // This must be a constant between -255 and -1, for negated ADD
20253 // immediates. This can be used in GCC with an "n" modifier that
20254 // prints the negated value, for use with SUB instructions. It is
20255 // not useful otherwise but is implemented for compatibility.
20256 if (CVal >= -255 && CVal <= -1)
20257 break;
20258 } else {
20259 // This must be a constant between -4095 and 4095. It is not clear
20260 // what this constraint is intended for. Implemented for
20261 // compatibility with GCC.
20262 if (CVal >= -4095 && CVal <= 4095)
20263 break;
20264 }
20265 return;
20266
20267 case 'K':
20268 if (Subtarget->isThumb1Only()) {
20269 // A 32-bit value where only one byte has a nonzero value. Exclude
20270 // zero to match GCC. This constraint is used by GCC internally for
20271 // constants that can be loaded with a move/shift combination.
20272 // It is not useful otherwise but is implemented for compatibility.
20273 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20274 break;
20275 } else if (Subtarget->isThumb2()) {
20276 // A constant whose bitwise inverse can be used as an immediate
20277 // value in a data-processing instruction. This can be used in GCC
20278 // with a "B" modifier that prints the inverted value, for use with
20279 // BIC and MVN instructions. It is not useful otherwise but is
20280 // implemented for compatibility.
20281 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20282 break;
20283 } else {
20284 // A constant whose bitwise inverse can be used as an immediate
20285 // value in a data-processing instruction. This can be used in GCC
20286 // with a "B" modifier that prints the inverted value, for use with
20287 // BIC and MVN instructions. It is not useful otherwise but is
20288 // implemented for compatibility.
20289 if (ARM_AM::getSOImmVal(~CVal) != -1)
20290 break;
20291 }
20292 return;
20293
20294 case 'L':
20295 if (Subtarget->isThumb1Only()) {
20296 // This must be a constant between -7 and 7,
20297 // for 3-operand ADD/SUB immediate instructions.
20298 if (CVal >= -7 && CVal < 7)
20299 break;
20300 } else if (Subtarget->isThumb2()) {
20301 // A constant whose negation can be used as an immediate value in a
20302 // data-processing instruction. This can be used in GCC with an "n"
20303 // modifier that prints the negated value, for use with SUB
20304 // instructions. It is not useful otherwise but is implemented for
20305 // compatibility.
20306 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20307 break;
20308 } else {
20309 // A constant whose negation can be used as an immediate value in a
20310 // data-processing instruction. This can be used in GCC with an "n"
20311 // modifier that prints the negated value, for use with SUB
20312 // instructions. It is not useful otherwise but is implemented for
20313 // compatibility.
20314 if (ARM_AM::getSOImmVal(-CVal) != -1)
20315 break;
20316 }
20317 return;
20318
20319 case 'M':
20320 if (Subtarget->isThumb1Only()) {
20321 // This must be a multiple of 4 between 0 and 1020, for
20322 // ADD sp + immediate.
20323 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20324 break;
20325 } else {
20326 // A power of two or a constant between 0 and 32. This is used in
20327 // GCC for the shift amount on shifted register operands, but it is
20328 // useful in general for any shift amounts.
20329 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20330 break;
20331 }
20332 return;
20333
20334 case 'N':
20335 if (Subtarget->isThumb1Only()) {
20336 // This must be a constant between 0 and 31, for shift amounts.
20337 if (CVal >= 0 && CVal <= 31)
20338 break;
20339 }
20340 return;
20341
20342 case 'O':
20343 if (Subtarget->isThumb1Only()) {
20344 // This must be a multiple of 4 between -508 and 508, for
20345 // ADD/SUB sp = sp + immediate.
20346 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20347 break;
20348 }
20349 return;
20350 }
20351 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20352 break;
20353 }
20354
20355 if (Result.getNode()) {
20356 Ops.push_back(Result);
20357 return;
20358 }
20359 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20360}
20361
20362static RTLIB::Libcall getDivRemLibcall(
20363 const SDNode *N, MVT::SimpleValueType SVT) {
20364 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20366, __extension__
__PRETTY_FUNCTION__))
20365 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20366, __extension__
__PRETTY_FUNCTION__))
20366 "Unhandled Opcode in getDivRemLibcall")(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemLibcall\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20366, __extension__
__PRETTY_FUNCTION__))
;
20367 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20368 N->getOpcode() == ISD::SREM;
20369 RTLIB::Libcall LC;
20370 switch (SVT) {
20371 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20371)
;
20372 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20373 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20374 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20375 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20376 }
20377 return LC;
20378}
20379
20380static TargetLowering::ArgListTy getDivRemArgList(
20381 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20382 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20384, __extension__
__PRETTY_FUNCTION__))
20383 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20384, __extension__
__PRETTY_FUNCTION__))
20384 "Unhandled Opcode in getDivRemArgList")(static_cast <bool> ((N->getOpcode() == ISD::SDIVREM
|| N->getOpcode() == ISD::UDIVREM || N->getOpcode() ==
ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && \"Unhandled Opcode in getDivRemArgList\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20384, __extension__
__PRETTY_FUNCTION__))
;
20385 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20386 N->getOpcode() == ISD::SREM;
20387 TargetLowering::ArgListTy Args;
20388 TargetLowering::ArgListEntry Entry;
20389 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20390 EVT ArgVT = N->getOperand(i).getValueType();
20391 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20392 Entry.Node = N->getOperand(i);
20393 Entry.Ty = ArgTy;
20394 Entry.IsSExt = isSigned;
20395 Entry.IsZExt = !isSigned;
20396 Args.push_back(Entry);
20397 }
20398 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20399 std::swap(Args[0], Args[1]);
20400 return Args;
20401}
20402
20403SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20404 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20407, __extension__
__PRETTY_FUNCTION__))
20405 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20407, __extension__
__PRETTY_FUNCTION__))
20406 Subtarget->isTargetWindows()) &&(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20407, __extension__
__PRETTY_FUNCTION__))
20407 "Register-based DivRem lowering only")(static_cast <bool> ((Subtarget->isTargetAEABI() || Subtarget
->isTargetAndroid() || Subtarget->isTargetGNUAEABI() ||
Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows
()) && "Register-based DivRem lowering only") ? void (
0) : __assert_fail ("(Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && \"Register-based DivRem lowering only\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20407, __extension__
__PRETTY_FUNCTION__))
;
20408 unsigned Opcode = Op->getOpcode();
20409 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&(static_cast <bool> ((Opcode == ISD::SDIVREM || Opcode ==
ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"
) ? void (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20410, __extension__
__PRETTY_FUNCTION__))
20410 "Invalid opcode for Div/Rem lowering")(static_cast <bool> ((Opcode == ISD::SDIVREM || Opcode ==
ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"
) ? void (0) : __assert_fail ("(Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && \"Invalid opcode for Div/Rem lowering\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20410, __extension__
__PRETTY_FUNCTION__))
;
20411 bool isSigned = (Opcode == ISD::SDIVREM);
20412 EVT VT = Op->getValueType(0);
20413 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20414 SDLoc dl(Op);
20415
20416 // If the target has hardware divide, use divide + multiply + subtract:
20417 // div = a / b
20418 // rem = a - b * div
20419 // return {div, rem}
20420 // This should be lowered into UDIV/SDIV + MLS later on.
20421 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20422 : Subtarget->hasDivideInARMMode();
20423 if (hasDivide && Op->getValueType(0).isSimple() &&
20424 Op->getSimpleValueType(0) == MVT::i32) {
20425 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20426 const SDValue Dividend = Op->getOperand(0);
20427 const SDValue Divisor = Op->getOperand(1);
20428 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20429 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20430 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20431
20432 SDValue Values[2] = {Div, Rem};
20433 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20434 }
20435
20436 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20437 VT.getSimpleVT().SimpleTy);
20438 SDValue InChain = DAG.getEntryNode();
20439
20440 TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
20441 DAG.getContext(),
20442 Subtarget);
20443
20444 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
20445 getPointerTy(DAG.getDataLayout()));
20446
20447 Type *RetTy = StructType::get(Ty, Ty);
20448
20449 if (Subtarget->isTargetWindows())
20450 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20451
20452 TargetLowering::CallLoweringInfo CLI(DAG);
20453 CLI.setDebugLoc(dl).setChain(InChain)
20454 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20455 .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
20456
20457 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20458 return CallInfo.first;
20459}
20460
20461// Lowers REM using divmod helpers
20462// see RTABI section 4.2/4.3
20463SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20464 // Build return types (div and rem)
20465 std::vector<Type*> RetTyParams;
20466 Type *RetTyElement;
20467
20468 switch (N->getValueType(0).getSimpleVT().SimpleTy) {
20469 default: llvm_unreachable("Unexpected request for libcall!")::llvm::llvm_unreachable_internal("Unexpected request for libcall!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20469)
;
20470 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20471 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20472 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20473 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20474 }
20475
20476 RetTyParams.push_back(RetTyElement);
20477 RetTyParams.push_back(RetTyElement);
20478 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20479 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20480
20481 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20482 SimpleTy);
20483 SDValue InChain = DAG.getEntryNode();
20484 TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
20485 Subtarget);
20486 bool isSigned = N->getOpcode() == ISD::SREM;
20487 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
20488 getPointerTy(DAG.getDataLayout()));
20489
20490 if (Subtarget->isTargetWindows())
20491 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20492
20493 // Lower call
20494 CallLoweringInfo CLI(DAG);
20495 CLI.setChain(InChain)
20496 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20497 .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
20498 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20499
20500 // Return second (rem) result operand (first contains div)
20501 SDNode *ResNode = CallResult.first.getNode();
20502 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands")(static_cast <bool> (ResNode->getNumOperands() == 2 &&
"divmod should return two operands") ? void (0) : __assert_fail
("ResNode->getNumOperands() == 2 && \"divmod should return two operands\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20502, __extension__
__PRETTY_FUNCTION__))
;
20503 return ResNode->getOperand(1);
20504}
20505
20506SDValue
20507ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20508 assert(Subtarget->isTargetWindows() && "unsupported target platform")(static_cast <bool> (Subtarget->isTargetWindows() &&
"unsupported target platform") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"unsupported target platform\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20508, __extension__
__PRETTY_FUNCTION__))
;
20509 SDLoc DL(Op);
20510
20511 // Get the inputs.
20512 SDValue Chain = Op.getOperand(0);
20513 SDValue Size = Op.getOperand(1);
20514
20515 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
20516 "no-stack-arg-probe")) {
20517 MaybeAlign Align =
20518 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20519 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20520 Chain = SP.getValue(1);
20521 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20522 if (Align)
20523 SP =
20524 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20525 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20526 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20527 SDValue Ops[2] = { SP, Chain };
20528 return DAG.getMergeValues(Ops, DL);
20529 }
20530
20531 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20532 DAG.getConstant(2, DL, MVT::i32));
20533
20534 SDValue Flag;
20535 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
20536 Flag = Chain.getValue(1);
20537
20538 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20539 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
20540
20541 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20542 Chain = NewSP.getValue(1);
20543
20544 SDValue Ops[2] = { NewSP, Chain };
20545 return DAG.getMergeValues(Ops, DL);
20546}
20547
20548SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20549 bool IsStrict = Op->isStrictFPOpcode();
20550 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20551 const unsigned DstSz = Op.getValueType().getSizeInBits();
20552 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20553 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&(static_cast <bool> (DstSz > SrcSz && DstSz <=
64 && SrcSz >= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? void (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20554, __extension__
__PRETTY_FUNCTION__))
20554 "Unexpected type for custom-lowering FP_EXTEND")(static_cast <bool> (DstSz > SrcSz && DstSz <=
64 && SrcSz >= 16 && "Unexpected type for custom-lowering FP_EXTEND"
) ? void (0) : __assert_fail ("DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20554, __extension__
__PRETTY_FUNCTION__))
;
20555
20556 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20557, __extension__
__PRETTY_FUNCTION__))
20557 "With both FP DP and 16, any FP conversion is legal!")(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20557, __extension__
__PRETTY_FUNCTION__))
;
20558
20559 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&(static_cast <bool> (!(DstSz == 32 && Subtarget
->hasFP16()) && "With FP16, 16 to 32 conversion is legal!"
) ? void (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20560, __extension__
__PRETTY_FUNCTION__))
20560 "With FP16, 16 to 32 conversion is legal!")(static_cast <bool> (!(DstSz == 32 && Subtarget
->hasFP16()) && "With FP16, 16 to 32 conversion is legal!"
) ? void (0) : __assert_fail ("!(DstSz == 32 && Subtarget->hasFP16()) && \"With FP16, 16 to 32 conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20560, __extension__
__PRETTY_FUNCTION__))
;
20561
20562 // Converting from 32 -> 64 is valid if we have FP64.
20563 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20564 // FIXME: Remove this when we have strict fp instruction selection patterns
20565 if (IsStrict) {
20566 SDLoc Loc(Op);
20567 SDValue Result = DAG.getNode(ISD::FP_EXTEND,
20568 Loc, Op.getValueType(), SrcVal);
20569 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20570 }
20571 return Op;
20572 }
20573
20574 // Either we are converting from 16 -> 64, without FP16 and/or
20575 // FP.double-precision or without Armv8-fp. So we must do it in two
20576 // steps.
20577 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20578 // without FP16. So we must do a function call.
20579 SDLoc Loc(Op);
20580 RTLIB::Libcall LC;
20581 MakeLibCallOptions CallOptions;
20582 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20583 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20584 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20585 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20586 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20587 if (Supported) {
20588 if (IsStrict) {
20589 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20590 {DstVT, MVT::Other}, {Chain, SrcVal});
20591 Chain = SrcVal.getValue(1);
20592 } else {
20593 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20594 }
20595 } else {
20596 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20597 assert(LC != RTLIB::UNKNOWN_LIBCALL &&(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20598, __extension__
__PRETTY_FUNCTION__))
20598 "Unexpected type for custom-lowering FP_EXTEND")(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_EXTEND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_EXTEND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20598, __extension__
__PRETTY_FUNCTION__))
;
20599 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20600 Loc, Chain);
20601 }
20602 }
20603
20604 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20605}
20606
20607SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20608 bool IsStrict = Op->isStrictFPOpcode();
20609
20610 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20611 EVT SrcVT = SrcVal.getValueType();
20612 EVT DstVT = Op.getValueType();
20613 const unsigned DstSz = Op.getValueType().getSizeInBits();
20614 const unsigned SrcSz = SrcVT.getSizeInBits();
20615 (void)DstSz;
20616 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&(static_cast <bool> (DstSz < SrcSz && SrcSz <=
64 && DstSz >= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? void (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20617, __extension__
__PRETTY_FUNCTION__))
20617 "Unexpected type for custom-lowering FP_ROUND")(static_cast <bool> (DstSz < SrcSz && SrcSz <=
64 && DstSz >= 16 && "Unexpected type for custom-lowering FP_ROUND"
) ? void (0) : __assert_fail ("DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20617, __extension__
__PRETTY_FUNCTION__))
;
20618
20619 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20620, __extension__
__PRETTY_FUNCTION__))
20620 "With both FP DP and 16, any FP conversion is legal!")(static_cast <bool> ((!Subtarget->hasFP64() || !Subtarget
->hasFPARMv8Base()) && "With both FP DP and 16, any FP conversion is legal!"
) ? void (0) : __assert_fail ("(!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) && \"With both FP DP and 16, any FP conversion is legal!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20620, __extension__
__PRETTY_FUNCTION__))
;
20621
20622 SDLoc Loc(Op);
20623
20624 // Instruction from 32 -> 16 if hasFP16 is valid
20625 if (SrcSz == 32 && Subtarget->hasFP16())
20626 return Op;
20627
20628 // Lib call from 32 -> 16 / 64 -> [32, 16]
20629 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20630 assert(LC != RTLIB::UNKNOWN_LIBCALL &&(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20631, __extension__
__PRETTY_FUNCTION__))
20631 "Unexpected type for custom-lowering FP_ROUND")(static_cast <bool> (LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected type for custom-lowering FP_ROUND") ? void (0) :
__assert_fail ("LC != RTLIB::UNKNOWN_LIBCALL && \"Unexpected type for custom-lowering FP_ROUND\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20631, __extension__
__PRETTY_FUNCTION__))
;
20632 MakeLibCallOptions CallOptions;
20633 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20634 SDValue Result;
20635 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20636 Loc, Chain);
20637 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20638}
20639
20640bool
20641ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
20642 // The ARM target isn't yet aware of offsets.
20643 return false;
20644}
20645
20646bool ARM::isBitFieldInvertedMask(unsigned v) {
20647 if (v == 0xffffffff)
20648 return false;
20649
20650 // there can be 1's on either or both "outsides", all the "inside"
20651 // bits must be 0's
20652 return isShiftedMask_32(~v);
20653}
20654
20655/// isFPImmLegal - Returns true if the target can instruction select the
20656/// specified FP immediate natively. If false, the legalizer will
20657/// materialize the FP immediate as a load from a constant pool.
20658bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
20659 bool ForCodeSize) const {
20660 if (!Subtarget->hasVFP3Base())
20661 return false;
20662 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20663 return ARM_AM::getFP16Imm(Imm) != -1;
20664 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20665 ARM_AM::getFP32FP16Imm(Imm) != -1)
20666 return true;
20667 if (VT == MVT::f32)
20668 return ARM_AM::getFP32Imm(Imm) != -1;
20669 if (VT == MVT::f64 && Subtarget->hasFP64())
20670 return ARM_AM::getFP64Imm(Imm) != -1;
20671 return false;
20672}
20673
20674/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20675/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20676/// specified in the intrinsic calls.
20677bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
20678 const CallInst &I,
20679 MachineFunction &MF,
20680 unsigned Intrinsic) const {
20681 switch (Intrinsic) {
20682 case Intrinsic::arm_neon_vld1:
20683 case Intrinsic::arm_neon_vld2:
20684 case Intrinsic::arm_neon_vld3:
20685 case Intrinsic::arm_neon_vld4:
20686 case Intrinsic::arm_neon_vld2lane:
20687 case Intrinsic::arm_neon_vld3lane:
20688 case Intrinsic::arm_neon_vld4lane:
20689 case Intrinsic::arm_neon_vld2dup:
20690 case Intrinsic::arm_neon_vld3dup:
20691 case Intrinsic::arm_neon_vld4dup: {
20692 Info.opc = ISD::INTRINSIC_W_CHAIN;
20693 // Conservatively set memVT to the entire set of vectors loaded.
20694 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20695 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20696 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20697 Info.ptrVal = I.getArgOperand(0);
20698 Info.offset = 0;
20699 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20700 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20701 // volatile loads with NEON intrinsics not supported
20702 Info.flags = MachineMemOperand::MOLoad;
20703 return true;
20704 }
20705 case Intrinsic::arm_neon_vld1x2:
20706 case Intrinsic::arm_neon_vld1x3:
20707 case Intrinsic::arm_neon_vld1x4: {
20708 Info.opc = ISD::INTRINSIC_W_CHAIN;
20709 // Conservatively set memVT to the entire set of vectors loaded.
20710 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20711 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
20712 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20713 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
20714 Info.offset = 0;
20715 Info.align.reset();
20716 // volatile loads with NEON intrinsics not supported
20717 Info.flags = MachineMemOperand::MOLoad;
20718 return true;
20719 }
20720 case Intrinsic::arm_neon_vst1:
20721 case Intrinsic::arm_neon_vst2:
20722 case Intrinsic::arm_neon_vst3:
20723 case Intrinsic::arm_neon_vst4:
20724 case Intrinsic::arm_neon_vst2lane:
20725 case Intrinsic::arm_neon_vst3lane:
20726 case Intrinsic::arm_neon_vst4lane: {
20727 Info.opc = ISD::INTRINSIC_VOID;
20728 // Conservatively set memVT to the entire set of vectors stored.
20729 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20730 unsigned NumElts = 0;
20731 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20732 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20733 if (!ArgTy->isVectorTy())
20734 break;
20735 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20736 }
20737 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20738 Info.ptrVal = I.getArgOperand(0);
20739 Info.offset = 0;
20740 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
20741 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
20742 // volatile stores with NEON intrinsics not supported
20743 Info.flags = MachineMemOperand::MOStore;
20744 return true;
20745 }
20746 case Intrinsic::arm_neon_vst1x2:
20747 case Intrinsic::arm_neon_vst1x3:
20748 case Intrinsic::arm_neon_vst1x4: {
20749 Info.opc = ISD::INTRINSIC_VOID;
20750 // Conservatively set memVT to the entire set of vectors stored.
20751 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20752 unsigned NumElts = 0;
20753 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
20754 Type *ArgTy = I.getArgOperand(ArgI)->getType();
20755 if (!ArgTy->isVectorTy())
20756 break;
20757 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
20758 }
20759 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
20760 Info.ptrVal = I.getArgOperand(0);
20761 Info.offset = 0;
20762 Info.align.reset();
20763 // volatile stores with NEON intrinsics not supported
20764 Info.flags = MachineMemOperand::MOStore;
20765 return true;
20766 }
20767 case Intrinsic::arm_mve_vld2q:
20768 case Intrinsic::arm_mve_vld4q: {
20769 Info.opc = ISD::INTRINSIC_W_CHAIN;
20770 // Conservatively set memVT to the entire set of vectors loaded.
20771 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
20772 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
20773 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20774 Info.ptrVal = I.getArgOperand(0);
20775 Info.offset = 0;
20776 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20777 // volatile loads with MVE intrinsics not supported
20778 Info.flags = MachineMemOperand::MOLoad;
20779 return true;
20780 }
20781 case Intrinsic::arm_mve_vst2q:
20782 case Intrinsic::arm_mve_vst4q: {
20783 Info.opc = ISD::INTRINSIC_VOID;
20784 // Conservatively set memVT to the entire set of vectors stored.
20785 Type *VecTy = I.getArgOperand(1)->getType();
20786 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
20787 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
20788 Info.ptrVal = I.getArgOperand(0);
20789 Info.offset = 0;
20790 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
20791 // volatile stores with MVE intrinsics not supported
20792 Info.flags = MachineMemOperand::MOStore;
20793 return true;
20794 }
20795 case Intrinsic::arm_mve_vldr_gather_base:
20796 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
20797 Info.opc = ISD::INTRINSIC_W_CHAIN;
20798 Info.ptrVal = nullptr;
20799 Info.memVT = MVT::getVT(I.getType());
20800 Info.align = Align(1);
20801 Info.flags |= MachineMemOperand::MOLoad;
20802 return true;
20803 }
20804 case Intrinsic::arm_mve_vldr_gather_base_wb:
20805 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
20806 Info.opc = ISD::INTRINSIC_W_CHAIN;
20807 Info.ptrVal = nullptr;
20808 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
20809 Info.align = Align(1);
20810 Info.flags |= MachineMemOperand::MOLoad;
20811 return true;
20812 }
20813 case Intrinsic::arm_mve_vldr_gather_offset:
20814 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
20815 Info.opc = ISD::INTRINSIC_W_CHAIN;
20816 Info.ptrVal = nullptr;
20817 MVT DataVT = MVT::getVT(I.getType());
20818 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
20819 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20820 DataVT.getVectorNumElements());
20821 Info.align = Align(1);
20822 Info.flags |= MachineMemOperand::MOLoad;
20823 return true;
20824 }
20825 case Intrinsic::arm_mve_vstr_scatter_base:
20826 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
20827 Info.opc = ISD::INTRINSIC_VOID;
20828 Info.ptrVal = nullptr;
20829 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20830 Info.align = Align(1);
20831 Info.flags |= MachineMemOperand::MOStore;
20832 return true;
20833 }
20834 case Intrinsic::arm_mve_vstr_scatter_base_wb:
20835 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
20836 Info.opc = ISD::INTRINSIC_W_CHAIN;
20837 Info.ptrVal = nullptr;
20838 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
20839 Info.align = Align(1);
20840 Info.flags |= MachineMemOperand::MOStore;
20841 return true;
20842 }
20843 case Intrinsic::arm_mve_vstr_scatter_offset:
20844 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
20845 Info.opc = ISD::INTRINSIC_VOID;
20846 Info.ptrVal = nullptr;
20847 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
20848 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
20849 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
20850 DataVT.getVectorNumElements());
20851 Info.align = Align(1);
20852 Info.flags |= MachineMemOperand::MOStore;
20853 return true;
20854 }
20855 case Intrinsic::arm_ldaex:
20856 case Intrinsic::arm_ldrex: {
20857 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20858 Type *ValTy = I.getParamElementType(0);
20859 Info.opc = ISD::INTRINSIC_W_CHAIN;
20860 Info.memVT = MVT::getVT(ValTy);
20861 Info.ptrVal = I.getArgOperand(0);
20862 Info.offset = 0;
20863 Info.align = DL.getABITypeAlign(ValTy);
20864 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
20865 return true;
20866 }
20867 case Intrinsic::arm_stlex:
20868 case Intrinsic::arm_strex: {
20869 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
20870 Type *ValTy = I.getParamElementType(1);
20871 Info.opc = ISD::INTRINSIC_W_CHAIN;
20872 Info.memVT = MVT::getVT(ValTy);
20873 Info.ptrVal = I.getArgOperand(1);
20874 Info.offset = 0;
20875 Info.align = DL.getABITypeAlign(ValTy);
20876 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
20877 return true;
20878 }
20879 case Intrinsic::arm_stlexd:
20880 case Intrinsic::arm_strexd:
20881 Info.opc = ISD::INTRINSIC_W_CHAIN;
20882 Info.memVT = MVT::i64;
20883 Info.ptrVal = I.getArgOperand(2);
20884 Info.offset = 0;
20885 Info.align = Align(8);
20886 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
20887 return true;
20888
20889 case Intrinsic::arm_ldaexd:
20890 case Intrinsic::arm_ldrexd:
20891 Info.opc = ISD::INTRINSIC_W_CHAIN;
20892 Info.memVT = MVT::i64;
20893 Info.ptrVal = I.getArgOperand(0);
20894 Info.offset = 0;
20895 Info.align = Align(8);
20896 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
20897 return true;
20898
20899 default:
20900 break;
20901 }
20902
20903 return false;
20904}
20905
20906/// Returns true if it is beneficial to convert a load of a constant
20907/// to just the constant itself.
20908bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
20909 Type *Ty) const {
20910 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 20910, __extension__ __PRETTY_FUNCTION__))
;
20911
20912 unsigned Bits = Ty->getPrimitiveSizeInBits();
20913 if (Bits == 0 || Bits > 32)
20914 return false;
20915 return true;
20916}
20917
20918bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
20919 unsigned Index) const {
20920 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
20921 return false;
20922
20923 return (Index == 0 || Index == ResVT.getVectorNumElements());
20924}
20925
20926Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,
20927 ARM_MB::MemBOpt Domain) const {
20928 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20929
20930 // First, if the target has no DMB, see what fallback we can use.
20931 if (!Subtarget->hasDataBarrier()) {
20932 // Some ARMv6 cpus can support data barriers with an mcr instruction.
20933 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
20934 // here.
20935 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
20936 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
20937 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
20938 Builder.getInt32(0), Builder.getInt32(7),
20939 Builder.getInt32(10), Builder.getInt32(5)};
20940 return Builder.CreateCall(MCR, args);
20941 } else {
20942 // Instead of using barriers, atomic accesses on these subtargets use
20943 // libcalls.
20944 llvm_unreachable("makeDMB on a target so old that it has no barriers")::llvm::llvm_unreachable_internal("makeDMB on a target so old that it has no barriers"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20944)
;
20945 }
20946 } else {
20947 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
20948 // Only a full system barrier exists in the M-class architectures.
20949 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
20950 Constant *CDomain = Builder.getInt32(Domain);
20951 return Builder.CreateCall(DMB, CDomain);
20952 }
20953}
20954
20955// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
20956Instruction *ARMTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
20957 Instruction *Inst,
20958 AtomicOrdering Ord) const {
20959 switch (Ord) {
20960 case AtomicOrdering::NotAtomic:
20961 case AtomicOrdering::Unordered:
20962 llvm_unreachable("Invalid fence: unordered/non-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/non-atomic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20962)
;
20963 case AtomicOrdering::Monotonic:
20964 case AtomicOrdering::Acquire:
20965 return nullptr; // Nothing to do
20966 case AtomicOrdering::SequentiallyConsistent:
20967 if (!Inst->hasAtomicStore())
20968 return nullptr; // Nothing to do
20969 [[fallthrough]];
20970 case AtomicOrdering::Release:
20971 case AtomicOrdering::AcquireRelease:
20972 if (Subtarget->preferISHSTBarriers())
20973 return makeDMB(Builder, ARM_MB::ISHST);
20974 // FIXME: add a comment with a link to documentation justifying this.
20975 else
20976 return makeDMB(Builder, ARM_MB::ISH);
20977 }
20978 llvm_unreachable("Unknown fence ordering in emitLeadingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitLeadingFence"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20978)
;
20979}
20980
20981Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
20982 Instruction *Inst,
20983 AtomicOrdering Ord) const {
20984 switch (Ord) {
20985 case AtomicOrdering::NotAtomic:
20986 case AtomicOrdering::Unordered:
20987 llvm_unreachable("Invalid fence: unordered/not-atomic")::llvm::llvm_unreachable_internal("Invalid fence: unordered/not-atomic"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20987)
;
20988 case AtomicOrdering::Monotonic:
20989 case AtomicOrdering::Release:
20990 return nullptr; // Nothing to do
20991 case AtomicOrdering::Acquire:
20992 case AtomicOrdering::AcquireRelease:
20993 case AtomicOrdering::SequentiallyConsistent:
20994 return makeDMB(Builder, ARM_MB::ISH);
20995 }
20996 llvm_unreachable("Unknown fence ordering in emitTrailingFence")::llvm::llvm_unreachable_internal("Unknown fence ordering in emitTrailingFence"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 20996)
;
20997}
20998
20999// Loads and stores less than 64-bits are already atomic; ones above that
21000// are doomed anyway, so defer to the default libcall and blame the OS when
21001// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21002// anything for those.
21003TargetLoweringBase::AtomicExpansionKind
21004ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
21005 bool has64BitAtomicStore;
21006 if (Subtarget->isMClass())
21007 has64BitAtomicStore = false;
21008 else if (Subtarget->isThumb())
21009 has64BitAtomicStore = Subtarget->hasV7Ops();
21010 else
21011 has64BitAtomicStore = Subtarget->hasV6Ops();
21012
21013 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21014 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21015 : AtomicExpansionKind::None;
21016}
21017
21018// Loads and stores less than 64-bits are already atomic; ones above that
21019// are doomed anyway, so defer to the default libcall and blame the OS when
21020// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21021// anything for those.
21022// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21023// guarantee, see DDI0406C ARM architecture reference manual,
21024// sections A8.8.72-74 LDRD)
21025TargetLowering::AtomicExpansionKind
21026ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
21027 bool has64BitAtomicLoad;
21028 if (Subtarget->isMClass())
21029 has64BitAtomicLoad = false;
21030 else if (Subtarget->isThumb())
21031 has64BitAtomicLoad = Subtarget->hasV7Ops();
21032 else
21033 has64BitAtomicLoad = Subtarget->hasV6Ops();
21034
21035 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21036 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21037 : AtomicExpansionKind::None;
21038}
21039
21040// For the real atomic operations, we have ldrex/strex up to 32 bits,
21041// and up to 64 bits on the non-M profiles
21042TargetLowering::AtomicExpansionKind
21043ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
21044 if (AI->isFloatingPointOperation())
21045 return AtomicExpansionKind::CmpXChg;
21046
21047 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21048 bool hasAtomicRMW;
21049 if (Subtarget->isMClass())
21050 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21051 else if (Subtarget->isThumb())
21052 hasAtomicRMW = Subtarget->hasV7Ops();
21053 else
21054 hasAtomicRMW = Subtarget->hasV6Ops();
21055 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21056 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21057 // implement atomicrmw without spilling. If the target address is also on
21058 // the stack and close enough to the spill slot, this can lead to a
21059 // situation where the monitor always gets cleared and the atomic operation
21060 // can never succeed. So at -O0 lower this operation to a CAS loop.
21061 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
21062 return AtomicExpansionKind::CmpXChg;
21063 return AtomicExpansionKind::LLSC;
21064 }
21065 return AtomicExpansionKind::None;
21066}
21067
21068// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21069// bits, and up to 64 bits on the non-M profiles.
21070TargetLowering::AtomicExpansionKind
21071ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
21072 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21073 // implement cmpxchg without spilling. If the address being exchanged is also
21074 // on the stack and close enough to the spill slot, this can lead to a
21075 // situation where the monitor always gets cleared and the atomic operation
21076 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21077 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21078 bool HasAtomicCmpXchg;
21079 if (Subtarget->isMClass())
21080 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21081 else if (Subtarget->isThumb())
21082 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21083 else
21084 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21085 if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
21086 Size <= (Subtarget->isMClass() ? 32U : 64U))
21087 return AtomicExpansionKind::LLSC;
21088 return AtomicExpansionKind::None;
21089}
21090
21091bool ARMTargetLowering::shouldInsertFencesForAtomic(
21092 const Instruction *I) const {
21093 return InsertFencesForAtomic;
21094}
21095
21096bool ARMTargetLowering::useLoadStackGuardNode() const {
21097 // ROPI/RWPI are not supported currently.
21098 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21099}
21100
21101void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
21102 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21103 return TargetLowering::insertSSPDeclarations(M);
21104
21105 // MSVC CRT has a global variable holding security cookie.
21106 M.getOrInsertGlobal("__security_cookie",
21107 Type::getInt8PtrTy(M.getContext()));
21108
21109 // MSVC CRT has a function to validate security cookie.
21110 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21111 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21112 Type::getInt8PtrTy(M.getContext()));
21113 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21114 F->addParamAttr(0, Attribute::AttrKind::InReg);
21115}
21116
21117Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
21118 // MSVC CRT has a global variable holding security cookie.
21119 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21120 return M.getGlobalVariable("__security_cookie");
21121 return TargetLowering::getSDagStackGuard(M);
21122}
21123
21124Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
21125 // MSVC CRT has a function to validate security cookie.
21126 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21127 return M.getFunction("__security_check_cookie");
21128 return TargetLowering::getSSPStackGuardCheck(M);
21129}
21130
21131bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
21132 unsigned &Cost) const {
21133 // If we do not have NEON, vector types are not natively supported.
21134 if (!Subtarget->hasNEON())
21135 return false;
21136
21137 // Floating point values and vector values map to the same register file.
21138 // Therefore, although we could do a store extract of a vector type, this is
21139 // better to leave at float as we have more freedom in the addressing mode for
21140 // those.
21141 if (VectorTy->isFPOrFPVectorTy())
21142 return false;
21143
21144 // If the index is unknown at compile time, this is very expensive to lower
21145 // and it is not possible to combine the store with the extract.
21146 if (!isa<ConstantInt>(Idx))
21147 return false;
21148
21149 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type")(static_cast <bool> (VectorTy->isVectorTy() &&
"VectorTy is not a vector type") ? void (0) : __assert_fail (
"VectorTy->isVectorTy() && \"VectorTy is not a vector type\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21149, __extension__
__PRETTY_FUNCTION__))
;
21150 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedSize();
21151 // We can do a store + vector extract on any vector that fits perfectly in a D
21152 // or Q register.
21153 if (BitWidth == 64 || BitWidth == 128) {
21154 Cost = 0;
21155 return true;
21156 }
21157 return false;
21158}
21159
21160bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
21161 return Subtarget->hasV6T2Ops();
21162}
21163
21164bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
21165 return Subtarget->hasV6T2Ops();
21166}
21167
21168bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
21169 return !Subtarget->hasMinSize() || Subtarget->isTargetWindows();
21170}
21171
21172Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
21173 Value *Addr,
21174 AtomicOrdering Ord) const {
21175 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21176 bool IsAcquire = isAcquireOrStronger(Ord);
21177
21178 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21179 // intrinsic must return {i32, i32} and we have to recombine them into a
21180 // single i64 here.
21181 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21182 Intrinsic::ID Int =
21183 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21184 Function *Ldrex = Intrinsic::getDeclaration(M, Int);
21185
21186 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21187 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21188
21189 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21190 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21191 if (!Subtarget->isLittle())
21192 std::swap (Lo, Hi);
21193 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21194 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21195 return Builder.CreateOr(
21196 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21197 }
21198
21199 Type *Tys[] = { Addr->getType() };
21200 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21201 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21202 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21203
21204 CI->addParamAttr(
21205 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21206 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21207}
21208
21209void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
21210 IRBuilderBase &Builder) const {
21211 if (!Subtarget->hasV7Ops())
21212 return;
21213 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21214 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21215}
21216
21217Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
21218 Value *Val, Value *Addr,
21219 AtomicOrdering Ord) const {
21220 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21221 bool IsRelease = isReleaseOrStronger(Ord);
21222
21223 // Since the intrinsics must have legal type, the i64 intrinsics take two
21224 // parameters: "i32, i32". We must marshal Val into the appropriate form
21225 // before the call.
21226 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21227 Intrinsic::ID Int =
21228 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21229 Function *Strex = Intrinsic::getDeclaration(M, Int);
21230 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21231
21232 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21233 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21234 if (!Subtarget->isLittle())
21235 std::swap(Lo, Hi);
21236 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
21237 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21238 }
21239
21240 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21241 Type *Tys[] = { Addr->getType() };
21242 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21243
21244 CallInst *CI = Builder.CreateCall(
21245 Strex, {Builder.CreateZExtOrBitCast(
21246 Val, Strex->getFunctionType()->getParamType(0)),
21247 Addr});
21248 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21249 Val->getType()));
21250 return CI;
21251}
21252
21253
21254bool ARMTargetLowering::alignLoopsWithOptSize() const {
21255 return Subtarget->isMClass();
21256}
21257
21258/// A helper function for determining the number of interleaved accesses we
21259/// will generate when lowering accesses of the given type.
21260unsigned
21261ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
21262 const DataLayout &DL) const {
21263 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21264}
21265
21266bool ARMTargetLowering::isLegalInterleavedAccessType(
21267 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21268 const DataLayout &DL) const {
21269
21270 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21271 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21272
21273 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21274 return false;
21275
21276 // Ensure the vector doesn't have f16 elements. Even though we could do an
21277 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21278 // f32.
21279 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21280 return false;
21281 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21282 return false;
21283
21284 // Ensure the number of vector elements is greater than 1.
21285 if (VecTy->getNumElements() < 2)
21286 return false;
21287
21288 // Ensure the element type is legal.
21289 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21290 return false;
21291 // And the alignment if high enough under MVE.
21292 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21293 return false;
21294
21295 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21296 // 128 will be split into multiple interleaved accesses.
21297 if (Subtarget->hasNEON() && VecSize == 64)
21298 return true;
21299 return VecSize % 128 == 0;
21300}
21301
21302unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {
21303 if (Subtarget->hasNEON())
21304 return 4;
21305 if (Subtarget->hasMVEIntegerOps())
21306 return MVEMaxSupportedInterleaveFactor;
21307 return TargetLoweringBase::getMaxSupportedInterleaveFactor();
21308}
21309
21310/// Lower an interleaved load into a vldN intrinsic.
21311///
21312/// E.g. Lower an interleaved load (Factor = 2):
21313/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21314/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21315/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21316///
21317/// Into:
21318/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21319/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21320/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21321bool ARMTargetLowering::lowerInterleavedLoad(
21322 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
21323 ArrayRef<unsigned> Indices, unsigned Factor) const {
21324 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21325, __extension__
__PRETTY_FUNCTION__))
21325 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21325, __extension__
__PRETTY_FUNCTION__))
;
21326 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21326, __extension__
__PRETTY_FUNCTION__))
;
21327 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21328, __extension__
__PRETTY_FUNCTION__))
21328 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21328, __extension__
__PRETTY_FUNCTION__))
;
21329
21330 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21331 Type *EltTy = VecTy->getElementType();
21332
21333 const DataLayout &DL = LI->getModule()->getDataLayout();
21334 Align Alignment = LI->getAlign();
21335
21336 // Skip if we do not have NEON and skip illegal vector types. We can
21337 // "legalize" wide vector types into multiple interleaved accesses as long as
21338 // the vector types are divisible by 128.
21339 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21340 return false;
21341
21342 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21343
21344 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21345 // load integer vectors first and then convert to pointer vectors.
21346 if (EltTy->isPointerTy())
21347 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21348
21349 IRBuilder<> Builder(LI);
21350
21351 // The base address of the load.
21352 Value *BaseAddr = LI->getPointerOperand();
21353
21354 if (NumLoads > 1) {
21355 // If we're going to generate more than one load, reset the sub-vector type
21356 // to something legal.
21357 VecTy = FixedVectorType::get(VecTy->getElementType(),
21358 VecTy->getNumElements() / NumLoads);
21359
21360 // We will compute the pointer operand of each load from the original base
21361 // address using GEPs. Cast the base address to a pointer to the scalar
21362 // element type.
21363 BaseAddr = Builder.CreateBitCast(
21364 BaseAddr,
21365 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
21366 }
21367
21368 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!")(static_cast <bool> (isTypeLegal(EVT::getEVT(VecTy)) &&
"Illegal vldN vector type!") ? void (0) : __assert_fail ("isTypeLegal(EVT::getEVT(VecTy)) && \"Illegal vldN vector type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21368, __extension__
__PRETTY_FUNCTION__))
;
21369
21370 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21371 if (Subtarget->hasNEON()) {
21372 Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
21373 Type *Tys[] = {VecTy, Int8Ptr};
21374 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21375 Intrinsic::arm_neon_vld3,
21376 Intrinsic::arm_neon_vld4};
21377 Function *VldnFunc =
21378 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21379
21380 SmallVector<Value *, 2> Ops;
21381 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21382 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21383
21384 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21385 } else {
21386 assert((Factor == 2 || Factor == 4) &&(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21387, __extension__
__PRETTY_FUNCTION__))
21387 "expected interleave factor of 2 or 4 for MVE")(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21387, __extension__
__PRETTY_FUNCTION__))
;
21388 Intrinsic::ID LoadInts =
21389 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21390 Type *VecEltTy =
21391 VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
21392 Type *Tys[] = {VecTy, VecEltTy};
21393 Function *VldnFunc =
21394 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21395
21396 SmallVector<Value *, 2> Ops;
21397 Ops.push_back(Builder.CreateBitCast(BaseAddr, VecEltTy));
21398 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21399 }
21400 };
21401
21402 // Holds sub-vectors extracted from the load intrinsic return values. The
21403 // sub-vectors are associated with the shufflevector instructions they will
21404 // replace.
21405 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
21406
21407 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21408 // If we're generating more than one load, compute the base address of
21409 // subsequent loads as an offset from the previous.
21410 if (LoadCount > 0)
21411 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21412 VecTy->getNumElements() * Factor);
21413
21414 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21415
21416 // Replace uses of each shufflevector with the corresponding vector loaded
21417 // by ldN.
21418 for (unsigned i = 0; i < Shuffles.size(); i++) {
21419 ShuffleVectorInst *SV = Shuffles[i];
21420 unsigned Index = Indices[i];
21421
21422 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21423
21424 // Convert the integer vector to pointer vector if the element is pointer.
21425 if (EltTy->isPointerTy())
21426 SubVec = Builder.CreateIntToPtr(
21427 SubVec,
21428 FixedVectorType::get(SV->getType()->getElementType(), VecTy));
21429
21430 SubVecs[SV].push_back(SubVec);
21431 }
21432 }
21433
21434 // Replace uses of the shufflevector instructions with the sub-vectors
21435 // returned by the load intrinsic. If a shufflevector instruction is
21436 // associated with more than one sub-vector, those sub-vectors will be
21437 // concatenated into a single wide vector.
21438 for (ShuffleVectorInst *SVI : Shuffles) {
21439 auto &SubVec = SubVecs[SVI];
21440 auto *WideVec =
21441 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21442 SVI->replaceAllUsesWith(WideVec);
21443 }
21444
21445 return true;
21446}
21447
21448/// Lower an interleaved store into a vstN intrinsic.
21449///
21450/// E.g. Lower an interleaved store (Factor = 3):
21451/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21452/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21453/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21454///
21455/// Into:
21456/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21457/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21458/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21459/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21460///
21461/// Note that the new shufflevectors will be removed and we'll only generate one
21462/// vst3 instruction in CodeGen.
21463///
21464/// Example for a more general valid mask (Factor 3). Lower:
21465/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21466/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21467/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21468///
21469/// Into:
21470/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21471/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21472/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21473/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21474bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
21475 ShuffleVectorInst *SVI,
21476 unsigned Factor) const {
21477 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21478, __extension__
__PRETTY_FUNCTION__))
21478 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21478, __extension__
__PRETTY_FUNCTION__))
;
21479
21480 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21481 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")(static_cast <bool> (VecTy->getNumElements() % Factor
== 0 && "Invalid interleaved store") ? void (0) : __assert_fail
("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21481, __extension__
__PRETTY_FUNCTION__))
;
21482
21483 unsigned LaneLen = VecTy->getNumElements() / Factor;
21484 Type *EltTy = VecTy->getElementType();
21485 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21486
21487 const DataLayout &DL = SI->getModule()->getDataLayout();
21488 Align Alignment = SI->getAlign();
21489
21490 // Skip if we do not have NEON and skip illegal vector types. We can
21491 // "legalize" wide vector types into multiple interleaved accesses as long as
21492 // the vector types are divisible by 128.
21493 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21494 return false;
21495
21496 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21497
21498 Value *Op0 = SVI->getOperand(0);
21499 Value *Op1 = SVI->getOperand(1);
21500 IRBuilder<> Builder(SI);
21501
21502 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21503 // vectors to integer vectors.
21504 if (EltTy->isPointerTy()) {
21505 Type *IntTy = DL.getIntPtrType(EltTy);
21506
21507 // Convert to the corresponding integer vector.
21508 auto *IntVecTy =
21509 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21510 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21511 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21512
21513 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21514 }
21515
21516 // The base address of the store.
21517 Value *BaseAddr = SI->getPointerOperand();
21518
21519 if (NumStores > 1) {
21520 // If we're going to generate more than one store, reset the lane length
21521 // and sub-vector type to something legal.
21522 LaneLen /= NumStores;
21523 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21524
21525 // We will compute the pointer operand of each store from the original base
21526 // address using GEPs. Cast the base address to a pointer to the scalar
21527 // element type.
21528 BaseAddr = Builder.CreateBitCast(
21529 BaseAddr,
21530 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
21531 }
21532
21533 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!")(static_cast <bool> (isTypeLegal(EVT::getEVT(SubVecTy))
&& "Illegal vstN vector type!") ? void (0) : __assert_fail
("isTypeLegal(EVT::getEVT(SubVecTy)) && \"Illegal vstN vector type!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21533, __extension__
__PRETTY_FUNCTION__))
;
21534
21535 auto Mask = SVI->getShuffleMask();
21536
21537 auto createStoreIntrinsic = [&](Value *BaseAddr,
21538 SmallVectorImpl<Value *> &Shuffles) {
21539 if (Subtarget->hasNEON()) {
21540 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21541 Intrinsic::arm_neon_vst3,
21542 Intrinsic::arm_neon_vst4};
21543 Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
21544 Type *Tys[] = {Int8Ptr, SubVecTy};
21545
21546 Function *VstNFunc = Intrinsic::getDeclaration(
21547 SI->getModule(), StoreInts[Factor - 2], Tys);
21548
21549 SmallVector<Value *, 6> Ops;
21550 Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
21551 append_range(Ops, Shuffles);
21552 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21553 Builder.CreateCall(VstNFunc, Ops);
21554 } else {
21555 assert((Factor == 2 || Factor == 4) &&(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21556, __extension__
__PRETTY_FUNCTION__))
21556 "expected interleave factor of 2 or 4 for MVE")(static_cast <bool> ((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE") ? void (0) :
__assert_fail ("(Factor == 2 || Factor == 4) && \"expected interleave factor of 2 or 4 for MVE\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21556, __extension__
__PRETTY_FUNCTION__))
;
21557 Intrinsic::ID StoreInts =
21558 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21559 Type *EltPtrTy = SubVecTy->getElementType()->getPointerTo(
21560 SI->getPointerAddressSpace());
21561 Type *Tys[] = {EltPtrTy, SubVecTy};
21562 Function *VstNFunc =
21563 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21564
21565 SmallVector<Value *, 6> Ops;
21566 Ops.push_back(Builder.CreateBitCast(BaseAddr, EltPtrTy));
21567 append_range(Ops, Shuffles);
21568 for (unsigned F = 0; F < Factor; F++) {
21569 Ops.push_back(Builder.getInt32(F));
21570 Builder.CreateCall(VstNFunc, Ops);
21571 Ops.pop_back();
21572 }
21573 }
21574 };
21575
21576 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21577 // If we generating more than one store, we compute the base address of
21578 // subsequent stores as an offset from the previous.
21579 if (StoreCount > 0)
21580 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21581 BaseAddr, LaneLen * Factor);
21582
21583 SmallVector<Value *, 4> Shuffles;
21584
21585 // Split the shufflevector operands into sub vectors for the new vstN call.
21586 for (unsigned i = 0; i < Factor; i++) {
21587 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21588 if (Mask[IdxI] >= 0) {
21589 Shuffles.push_back(Builder.CreateShuffleVector(
21590 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21591 } else {
21592 unsigned StartMask = 0;
21593 for (unsigned j = 1; j < LaneLen; j++) {
21594 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21595 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21596 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21597 break;
21598 }
21599 }
21600 // Note: If all elements in a chunk are undefs, StartMask=0!
21601 // Note: Filling undef gaps with random elements is ok, since
21602 // those elements were being written anyway (with undefs).
21603 // In the case of all undefs we're defaulting to using elems from 0
21604 // Note: StartMask cannot be negative, it's checked in
21605 // isReInterleaveMask
21606 Shuffles.push_back(Builder.CreateShuffleVector(
21607 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21608 }
21609 }
21610
21611 createStoreIntrinsic(BaseAddr, Shuffles);
21612 }
21613 return true;
21614}
21615
21616enum HABaseType {
21617 HA_UNKNOWN = 0,
21618 HA_FLOAT,
21619 HA_DOUBLE,
21620 HA_VECT64,
21621 HA_VECT128
21622};
21623
21624static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
21625 uint64_t &Members) {
21626 if (auto *ST = dyn_cast<StructType>(Ty)) {
21627 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21628 uint64_t SubMembers = 0;
21629 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21630 return false;
21631 Members += SubMembers;
21632 }
21633 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21634 uint64_t SubMembers = 0;
21635 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21636 return false;
21637 Members += SubMembers * AT->getNumElements();
21638 } else if (Ty->isFloatTy()) {
21639 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21640 return false;
21641 Members = 1;
21642 Base = HA_FLOAT;
21643 } else if (Ty->isDoubleTy()) {
21644 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21645 return false;
21646 Members = 1;
21647 Base = HA_DOUBLE;
21648 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21649 Members = 1;
21650 switch (Base) {
21651 case HA_FLOAT:
21652 case HA_DOUBLE:
21653 return false;
21654 case HA_VECT64:
21655 return VT->getPrimitiveSizeInBits().getFixedSize() == 64;
21656 case HA_VECT128:
21657 return VT->getPrimitiveSizeInBits().getFixedSize() == 128;
21658 case HA_UNKNOWN:
21659 switch (VT->getPrimitiveSizeInBits().getFixedSize()) {
21660 case 64:
21661 Base = HA_VECT64;
21662 return true;
21663 case 128:
21664 Base = HA_VECT128;
21665 return true;
21666 default:
21667 return false;
21668 }
21669 }
21670 }
21671
21672 return (Members > 0 && Members <= 4);
21673}
21674
21675/// Return the correct alignment for the current calling convention.
21676Align ARMTargetLowering::getABIAlignmentForCallingConv(
21677 Type *ArgTy, const DataLayout &DL) const {
21678 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21679 if (!ArgTy->isVectorTy())
21680 return ABITypeAlign;
21681
21682 // Avoid over-aligning vector parameters. It would require realigning the
21683 // stack and waste space for no real benefit.
21684 return std::min(ABITypeAlign, DL.getStackAlignment());
21685}
21686
21687/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
21688/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
21689/// passing according to AAPCS rules.
21690bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
21691 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
21692 const DataLayout &DL) const {
21693 if (getEffectiveCallingConv(CallConv, isVarArg) !=
21694 CallingConv::ARM_AAPCS_VFP)
21695 return false;
21696
21697 HABaseType Base = HA_UNKNOWN;
21698 uint64_t Members = 0;
21699 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
21700 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { dbgs() << "isHA: " << IsHA <<
" "; Ty->dump(); } } while (false)
;
21701
21702 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
21703 return IsHA || IsIntArray;
21704}
21705
21706Register ARMTargetLowering::getExceptionPointerRegister(
21707 const Constant *PersonalityFn) const {
21708 // Platforms which do not use SjLj EH may return values in these registers
21709 // via the personality function.
21710 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
21711}
21712
21713Register ARMTargetLowering::getExceptionSelectorRegister(
21714 const Constant *PersonalityFn) const {
21715 // Platforms which do not use SjLj EH may return values in these registers
21716 // via the personality function.
21717 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
21718}
21719
21720void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
21721 // Update IsSplitCSR in ARMFunctionInfo.
21722 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
21723 AFI->setIsSplitCSR(true);
21724}
21725
21726void ARMTargetLowering::insertCopiesSplitCSR(
21727 MachineBasicBlock *Entry,
21728 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
21729 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
21730 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
21731 if (!IStart)
21732 return;
21733
21734 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
21735 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
21736 MachineBasicBlock::iterator MBBI = Entry->begin();
21737 for (const MCPhysReg *I = IStart; *I; ++I) {
21738 const TargetRegisterClass *RC = nullptr;
21739 if (ARM::GPRRegClass.contains(*I))
21740 RC = &ARM::GPRRegClass;
21741 else if (ARM::DPRRegClass.contains(*I))
21742 RC = &ARM::DPRRegClass;
21743 else
21744 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21744)
;
21745
21746 Register NewVR = MRI->createVirtualRegister(RC);
21747 // Create copy from CSR to a virtual register.
21748 // FIXME: this currently does not emit CFI pseudo-instructions, it works
21749 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
21750 // nounwind. If we want to generalize this later, we may need to emit
21751 // CFI pseudo-instructions.
21752 assert(Entry->getParent()->getFunction().hasFnAttribute((static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21754, __extension__
__PRETTY_FUNCTION__))
21753 Attribute::NoUnwind) &&(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21754, __extension__
__PRETTY_FUNCTION__))
21754 "Function should be nounwind in insertCopiesSplitCSR!")(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "llvm/lib/Target/ARM/ARMISelLowering.cpp", 21754, __extension__
__PRETTY_FUNCTION__))
;
21755 Entry->addLiveIn(*I);
21756 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
21757 .addReg(*I);
21758
21759 // Insert the copy-back instructions right before the terminator.
21760 for (auto *Exit : Exits)
21761 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
21762 TII->get(TargetOpcode::COPY), *I)
21763 .addReg(NewVR);
21764 }
21765}
21766
21767void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
21768 MF.getFrameInfo().computeMaxCallFrameSize(MF);
21769 TargetLoweringBase::finalizeLowering(MF);
21770}

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include/llvm/Support/Casting.h

1//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(),
10// cast_if_present<X>(), and dyn_cast_if_present<X>() templates.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_CASTING_H
15#define LLVM_SUPPORT_CASTING_H
16
17#include "llvm/ADT/Optional.h"
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/type_traits.h"
20#include <cassert>
21#include <memory>
22#include <type_traits>
23
24namespace llvm {
25
26//===----------------------------------------------------------------------===//
27// simplify_type
28//===----------------------------------------------------------------------===//
29
30/// Define a template that can be specialized by smart pointers to reflect the
31/// fact that they are automatically dereferenced, and are not involved with the
32/// template selection process... the default implementation is a noop.
33// TODO: rename this and/or replace it with other cast traits.
34template <typename From> struct simplify_type {
35 using SimpleType = From; // The real type this represents...
36
37 // An accessor to get the real value...
38 static SimpleType &getSimplifiedValue(From &Val) { return Val; }
39};
40
41template <typename From> struct simplify_type<const From> {
42 using NonConstSimpleType = typename simplify_type<From>::SimpleType;
43 using SimpleType = typename add_const_past_pointer<NonConstSimpleType>::type;
44 using RetType =
45 typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
46
47 static RetType getSimplifiedValue(const From &Val) {
48 return simplify_type<From>::getSimplifiedValue(const_cast<From &>(Val));
49 }
50};
51
52// TODO: add this namespace once everyone is switched to using the new
53// interface.
54// namespace detail {
55
56//===----------------------------------------------------------------------===//
57// isa_impl
58//===----------------------------------------------------------------------===//
59
60// The core of the implementation of isa<X> is here; To and From should be
61// the names of classes. This template can be specialized to customize the
62// implementation of isa<> without rewriting it from scratch.
63template <typename To, typename From, typename Enabler = void> struct isa_impl {
64 static inline bool doit(const From &Val) { return To::classof(&Val); }
65};
66
67// Always allow upcasts, and perform no dynamic check for them.
68template <typename To, typename From>
69struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> {
70 static inline bool doit(const From &) { return true; }
71};
72
73template <typename To, typename From> struct isa_impl_cl {
74 static inline bool doit(const From &Val) {
75 return isa_impl<To, From>::doit(Val);
76 }
77};
78
79template <typename To, typename From> struct isa_impl_cl<To, const From> {
80 static inline bool doit(const From &Val) {
81 return isa_impl<To, From>::doit(Val);
82 }
83};
84
85template <typename To, typename From>
86struct isa_impl_cl<To, const std::unique_ptr<From>> {
87 static inline bool doit(const std::unique_ptr<From> &Val) {
88 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "llvm/include/llvm/Support/Casting.h", 88, __extension__ __PRETTY_FUNCTION__
))
;
89 return isa_impl_cl<To, From>::doit(*Val);
90 }
91};
92
93template <typename To, typename From> struct isa_impl_cl<To, From *> {
94 static inline bool doit(const From *Val) {
95 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "llvm/include/llvm/Support/Casting.h", 95, __extension__ __PRETTY_FUNCTION__
))
;
96 return isa_impl<To, From>::doit(*Val);
97 }
98};
99
100template <typename To, typename From> struct isa_impl_cl<To, From *const> {
101 static inline bool doit(const From *Val) {
102 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "llvm/include/llvm/Support/Casting.h", 102, __extension__ __PRETTY_FUNCTION__
))
;
103 return isa_impl<To, From>::doit(*Val);
104 }
105};
106
107template <typename To, typename From> struct isa_impl_cl<To, const From *> {
108 static inline bool doit(const From *Val) {
109 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "llvm/include/llvm/Support/Casting.h", 109, __extension__ __PRETTY_FUNCTION__
))
;
110 return isa_impl<To, From>::doit(*Val);
111 }
112};
113
114template <typename To, typename From>
115struct isa_impl_cl<To, const From *const> {
116 static inline bool doit(const From *Val) {
117 assert(Val && "isa<> used on a null pointer")(static_cast <bool> (Val && "isa<> used on a null pointer"
) ? void (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "llvm/include/llvm/Support/Casting.h", 117, __extension__ __PRETTY_FUNCTION__
))
;
118 return isa_impl<To, From>::doit(*Val);
119 }
120};
121
122template <typename To, typename From, typename SimpleFrom>
123struct isa_impl_wrap {
124 // When From != SimplifiedType, we can simplify the type some more by using
125 // the simplify_type template.
126 static bool doit(const From &Val) {
127 return isa_impl_wrap<To, SimpleFrom,
128 typename simplify_type<SimpleFrom>::SimpleType>::
129 doit(simplify_type<const From>::getSimplifiedValue(Val));
130 }
131};
132
133template <typename To, typename FromTy>
134struct isa_impl_wrap<To, FromTy, FromTy> {
135 // When From == SimpleType, we are as simple as we are going to get.
136 static bool doit(const FromTy &Val) {
137 return isa_impl_cl<To, FromTy>::doit(Val);
138 }
139};
140
141//===----------------------------------------------------------------------===//
142// cast_retty + cast_retty_impl
143//===----------------------------------------------------------------------===//
144
145template <class To, class From> struct cast_retty;
146
147// Calculate what type the 'cast' function should return, based on a requested
148// type of To and a source type of From.
149template <class To, class From> struct cast_retty_impl {
150 using ret_type = To &; // Normal case, return Ty&
151};
152template <class To, class From> struct cast_retty_impl<To, const From> {
153 using ret_type = const To &; // Normal case, return Ty&
154};
155
156template <class To, class From> struct cast_retty_impl<To, From *> {
157 using ret_type = To *; // Pointer arg case, return Ty*
158};
159
160template <class To, class From> struct cast_retty_impl<To, const From *> {
161 using ret_type = const To *; // Constant pointer arg case, return const Ty*
162};
163
164template <class To, class From> struct cast_retty_impl<To, const From *const> {
165 using ret_type = const To *; // Constant pointer arg case, return const Ty*
166};
167
168template <class To, class From>
169struct cast_retty_impl<To, std::unique_ptr<From>> {
170private:
171 using PointerType = typename cast_retty_impl<To, From *>::ret_type;
172 using ResultType = std::remove_pointer_t<PointerType>;
173
174public:
175 using ret_type = std::unique_ptr<ResultType>;
176};
177
178template <class To, class From, class SimpleFrom> struct cast_retty_wrap {
179 // When the simplified type and the from type are not the same, use the type
180 // simplifier to reduce the type, then reuse cast_retty_impl to get the
181 // resultant type.
182 using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
183};
184
185template <class To, class FromTy> struct cast_retty_wrap<To, FromTy, FromTy> {
186 // When the simplified type is equal to the from type, use it directly.
187 using ret_type = typename cast_retty_impl<To, FromTy>::ret_type;
188};
189
190template <class To, class From> struct cast_retty {
191 using ret_type = typename cast_retty_wrap<
192 To, From, typename simplify_type<From>::SimpleType>::ret_type;
193};
194
195//===----------------------------------------------------------------------===//
196// cast_convert_val
197//===----------------------------------------------------------------------===//
198
199// Ensure the non-simple values are converted using the simplify_type template
200// that may be specialized by smart pointers...
201//
202template <class To, class From, class SimpleFrom> struct cast_convert_val {
203 // This is not a simple type, use the template to simplify it...
204 static typename cast_retty<To, From>::ret_type doit(const From &Val) {
205 return cast_convert_val<To, SimpleFrom,
206 typename simplify_type<SimpleFrom>::SimpleType>::
207 doit(simplify_type<From>::getSimplifiedValue(const_cast<From &>(Val)));
208 }
209};
210
211template <class To, class FromTy> struct cast_convert_val<To, FromTy, FromTy> {
212 // If it's a reference, switch to a pointer to do the cast and then deref it.
213 static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
214 return *(std::remove_reference_t<typename cast_retty<To, FromTy>::ret_type>
215 *)&const_cast<FromTy &>(Val);
216 }
217};
218
219template <class To, class FromTy>
220struct cast_convert_val<To, FromTy *, FromTy *> {
221 // If it's a pointer, we can use c-style casting directly.
222 static typename cast_retty<To, FromTy *>::ret_type doit(const FromTy *Val) {
223 return (typename cast_retty<To, FromTy *>::ret_type) const_cast<FromTy *>(
224 Val);
225 }
226};
227
228//===----------------------------------------------------------------------===//
229// is_simple_type
230//===----------------------------------------------------------------------===//
231
232template <class X> struct is_simple_type {
233 static const bool value =
234 std::is_same<X, typename simplify_type<X>::SimpleType>::value;
235};
236
237// } // namespace detail
238
239//===----------------------------------------------------------------------===//
240// CastIsPossible
241//===----------------------------------------------------------------------===//
242
243/// This struct provides a way to check if a given cast is possible. It provides
244/// a static function called isPossible that is used to check if a cast can be
245/// performed. It should be overridden like this:
246///
247/// template<> struct CastIsPossible<foo, bar> {
248/// static inline bool isPossible(const bar &b) {
249/// return bar.isFoo();
250/// }
251/// };
252template <typename To, typename From, typename Enable = void>
253struct CastIsPossible {
254 static inline bool isPossible(const From &f) {
255 return isa_impl_wrap<
256 To, const From,
257 typename simplify_type<const From>::SimpleType>::doit(f);
258 }
259};
260
261// Needed for optional unwrapping. This could be implemented with isa_impl, but
262// we want to implement things in the new method and move old implementations
263// over. In fact, some of the isa_impl templates should be moved over to
264// CastIsPossible.
265template <typename To, typename From>
266struct CastIsPossible<To, Optional<From>> {
267 static inline bool isPossible(const Optional<From> &f) {
268 assert(f && "CastIsPossible::isPossible called on a nullopt!")(static_cast <bool> (f && "CastIsPossible::isPossible called on a nullopt!"
) ? void (0) : __assert_fail ("f && \"CastIsPossible::isPossible called on a nullopt!\""
, "llvm/include/llvm/Support/Casting.h", 268, __extension__ __PRETTY_FUNCTION__
))
;
269 return isa_impl_wrap<
270 To, const From,
271 typename simplify_type<const From>::SimpleType>::doit(*f);
272 }
273};
274
275/// Upcasting (from derived to base) and casting from a type to itself should
276/// always be possible.
277template <typename To, typename From>
278struct CastIsPossible<To, From,
279 std::enable_if_t<std::is_base_of<To, From>::value>> {
280 static inline bool isPossible(const From &f) { return true; }
281};
282
283//===----------------------------------------------------------------------===//
284// Cast traits
285//===----------------------------------------------------------------------===//
286
287/// All of these cast traits are meant to be implementations for useful casts
288/// that users may want to use that are outside the standard behavior. An
289/// example of how to use a special cast called `CastTrait` is:
290///
291/// template<> struct CastInfo<foo, bar> : public CastTrait<foo, bar> {};
292///
293/// Essentially, if your use case falls directly into one of the use cases
294/// supported by a given cast trait, simply inherit your special CastInfo
295/// directly from one of these to avoid having to reimplement the boilerplate
296/// `isPossible/castFailed/doCast/doCastIfPossible`. A cast trait can also
297/// provide a subset of those functions.
298
299/// This cast trait just provides castFailed for the specified `To` type to make
300/// CastInfo specializations more declarative. In order to use this, the target
301/// result type must be `To` and `To` must be constructible from `nullptr`.
302template <typename To> struct NullableValueCastFailed {
303 static To castFailed() { return To(nullptr); }
304};
305
306/// This cast trait just provides the default implementation of doCastIfPossible
307/// to make CastInfo specializations more declarative. The `Derived` template
308/// parameter *must* be provided for forwarding castFailed and doCast.
309template <typename To, typename From, typename Derived>
310struct DefaultDoCastIfPossible {
311 static To doCastIfPossible(From f) {
312 if (!Derived::isPossible(f))
313 return Derived::castFailed();
314 return Derived::doCast(f);
315 }
316};
317
318namespace detail {
319/// A helper to derive the type to use with `Self` for cast traits, when the
320/// provided CRTP derived type is allowed to be void.
321template <typename OptionalDerived, typename Default>
322using SelfType = std::conditional_t<std::is_same<OptionalDerived, void>::value,
323 Default, OptionalDerived>;
324} // namespace detail
325
326/// This cast trait provides casting for the specific case of casting to a
327/// value-typed object from a pointer-typed object. Note that `To` must be
328/// nullable/constructible from a pointer to `From` to use this cast.
329template <typename To, typename From, typename Derived = void>
330struct ValueFromPointerCast
331 : public CastIsPossible<To, From *>,
332 public NullableValueCastFailed<To>,
333 public DefaultDoCastIfPossible<
334 To, From *,
335 detail::SelfType<Derived, ValueFromPointerCast<To, From>>> {
336 static inline To doCast(From *f) { return To(f); }
337};
338
339/// This cast trait provides std::unique_ptr casting. It has the semantics of
340/// moving the contents of the input unique_ptr into the output unique_ptr
341/// during the cast. It's also a good example of how to implement a move-only
342/// cast.
343template <typename To, typename From, typename Derived = void>
344struct UniquePtrCast : public CastIsPossible<To, From *> {
345 using Self = detail::SelfType<Derived, UniquePtrCast<To, From>>;
346 using CastResultType = std::unique_ptr<
347 std::remove_reference_t<typename cast_retty<To, From>::ret_type>>;
348
349 static inline CastResultType doCast(std::unique_ptr<From> &&f) {
350 return CastResultType((typename CastResultType::element_type *)f.release());
351 }
352
353 static inline CastResultType castFailed() { return CastResultType(nullptr); }
354
355 static inline CastResultType doCastIfPossible(std::unique_ptr<From> &&f) {
356 if (!Self::isPossible(f))
357 return castFailed();
358 return doCast(f);
359 }
360};
361
362/// This cast trait provides Optional<T> casting. This means that if you have a
363/// value type, you can cast it to another value type and have dyn_cast return
364/// an Optional<T>.
365template <typename To, typename From, typename Derived = void>
366struct OptionalValueCast
367 : public CastIsPossible<To, From>,
368 public DefaultDoCastIfPossible<
369 Optional<To>, From,
370 detail::SelfType<Derived, OptionalValueCast<To, From>>> {
371 static inline Optional<To> castFailed() { return Optional<To>{}; }
372
373 static inline Optional<To> doCast(const From &f) { return To(f); }
374};
375
376/// Provides a cast trait that strips `const` from types to make it easier to
377/// implement a const-version of a non-const cast. It just removes boilerplate
378/// and reduces the amount of code you as the user need to implement. You can
379/// use it like this:
380///
381/// template<> struct CastInfo<foo, bar> {
382/// ...verbose implementation...
383/// };
384///
385/// template<> struct CastInfo<foo, const bar> : public
386/// ConstStrippingForwardingCast<foo, const bar, CastInfo<foo, bar>> {};
387///
388template <typename To, typename From, typename ForwardTo>
389struct ConstStrippingForwardingCast {
390 // Remove the pointer if it exists, then we can get rid of consts/volatiles.
391 using DecayedFrom = std::remove_cv_t<std::remove_pointer_t<From>>;
392 // Now if it's a pointer, add it back. Otherwise, we want a ref.
393 using NonConstFrom = std::conditional_t<std::is_pointer<From>::value,
394 DecayedFrom *, DecayedFrom &>;
395
396 static inline bool isPossible(const From &f) {
397 return ForwardTo::isPossible(const_cast<NonConstFrom>(f));
398 }
399
400 static inline decltype(auto) castFailed() { return ForwardTo::castFailed(); }
401
402 static inline decltype(auto) doCast(const From &f) {
403 return ForwardTo::doCast(const_cast<NonConstFrom>(f));
404 }
405
406 static inline decltype(auto) doCastIfPossible(const From &f) {
407 return ForwardTo::doCastIfPossible(const_cast<NonConstFrom>(f));
408 }
409};
410
411/// Provides a cast trait that uses a defined pointer to pointer cast as a base
412/// for reference-to-reference casts. Note that it does not provide castFailed
413/// and doCastIfPossible because a pointer-to-pointer cast would likely just
414/// return `nullptr` which could cause nullptr dereference. You can use it like
415/// this:
416///
417/// template <> struct CastInfo<foo, bar *> { ... verbose implementation... };
418///
419/// template <>
420/// struct CastInfo<foo, bar>
421/// : public ForwardToPointerCast<foo, bar, CastInfo<foo, bar *>> {};
422///
423template <typename To, typename From, typename ForwardTo>
424struct ForwardToPointerCast {
425 static inline bool isPossible(const From &f) {
426 return ForwardTo::isPossible(&f);
427 }
428
429 static inline decltype(auto) doCast(const From &f) {
430 return *ForwardTo::doCast(&f);
431 }
432};
433
434//===----------------------------------------------------------------------===//
435// CastInfo
436//===----------------------------------------------------------------------===//
437
438/// This struct provides a method for customizing the way a cast is performed.
439/// It inherits from CastIsPossible, to support the case of declaring many
440/// CastIsPossible specializations without having to specialize the full
441/// CastInfo.
442///
443/// In order to specialize different behaviors, specify different functions in
444/// your CastInfo specialization.
445/// For isa<> customization, provide:
446///
447/// `static bool isPossible(const From &f)`
448///
449/// For cast<> customization, provide:
450///
451/// `static To doCast(const From &f)`
452///
453/// For dyn_cast<> and the *_if_present<> variants' customization, provide:
454///
455/// `static To castFailed()` and `static To doCastIfPossible(const From &f)`
456///
457/// Your specialization might look something like this:
458///
459/// template<> struct CastInfo<foo, bar> : public CastIsPossible<foo, bar> {
460/// static inline foo doCast(const bar &b) {
461/// return foo(const_cast<bar &>(b));
462/// }
463/// static inline foo castFailed() { return foo(); }
464/// static inline foo doCastIfPossible(const bar &b) {
465/// if (!CastInfo<foo, bar>::isPossible(b))
466/// return castFailed();
467/// return doCast(b);
468/// }
469/// };
470
471// The default implementations of CastInfo don't use cast traits for now because
472// we need to specify types all over the place due to the current expected
473// casting behavior and the way cast_retty works. New use cases can and should
474// take advantage of the cast traits whenever possible!
475
476template <typename To, typename From, typename Enable = void>
477struct CastInfo : public CastIsPossible<To, From> {
478 using Self = CastInfo<To, From, Enable>;
479
480 using CastReturnType = typename cast_retty<To, From>::ret_type;
481
482 static inline CastReturnType doCast(const From &f) {
483 return cast_convert_val<
484 To, From,
485 typename simplify_type<From>::SimpleType>::doit(const_cast<From &>(f));
486 }
487
488 // This assumes that you can construct the cast return type from `nullptr`.
489 // This is largely to support legacy use cases - if you don't want this
490 // behavior you should specialize CastInfo for your use case.
491 static inline CastReturnType castFailed() { return CastReturnType(nullptr); }
492
493 static inline CastReturnType doCastIfPossible(const From &f) {
494 if (!Self::isPossible(f))
495 return castFailed();
496 return doCast(f);
497 }
498};
499
500/// This struct provides an overload for CastInfo where From has simplify_type
501/// defined. This simply forwards to the appropriate CastInfo with the
502/// simplified type/value, so you don't have to implement both.
503template <typename To, typename From>
504struct CastInfo<To, From, std::enable_if_t<!is_simple_type<From>::value>> {
505 using Self = CastInfo<To, From>;
506 using SimpleFrom = typename simplify_type<From>::SimpleType;
507 using SimplifiedSelf = CastInfo<To, SimpleFrom>;
508
509 static inline bool isPossible(From &f) {
510 return SimplifiedSelf::isPossible(
511 simplify_type<From>::getSimplifiedValue(f));
512 }
513
514 static inline decltype(auto) doCast(From &f) {
515 return SimplifiedSelf::doCast(simplify_type<From>::getSimplifiedValue(f));
516 }
517
518 static inline decltype(auto) castFailed() {
519 return SimplifiedSelf::castFailed();
520 }
521
522 static inline decltype(auto) doCastIfPossible(From &f) {
523 return SimplifiedSelf::doCastIfPossible(
16
Returning without writing to 'f.Node'
524 simplify_type<From>::getSimplifiedValue(f));
13
Calling 'simplify_type::getSimplifiedValue'
15
Returning from 'simplify_type::getSimplifiedValue'
525 }
526};
527
528//===----------------------------------------------------------------------===//
529// Pre-specialized CastInfo
530//===----------------------------------------------------------------------===//
531
532/// Provide a CastInfo specialized for std::unique_ptr.
533template <typename To, typename From>
534struct CastInfo<To, std::unique_ptr<From>> : public UniquePtrCast<To, From> {};
535
536/// Provide a CastInfo specialized for Optional<From>. It's assumed that if the
537/// input is Optional<From> that the output can be Optional<To>. If that's not
538/// the case, specialize CastInfo for your use case.
539template <typename To, typename From>
540struct CastInfo<To, Optional<From>> : public OptionalValueCast<To, From> {};
541
542/// isa<X> - Return true if the parameter to the template is an instance of one
543/// of the template type arguments. Used like this:
544///
545/// if (isa<Type>(myVal)) { ... }
546/// if (isa<Type0, Type1, Type2>(myVal)) { ... }
547template <typename To, typename From>
548[[nodiscard]] inline bool isa(const From &Val) {
549 return CastInfo<To, const From>::isPossible(Val);
550}
551
552template <typename First, typename Second, typename... Rest, typename From>
553[[nodiscard]] inline bool isa(const From &Val) {
554 return isa<First>(Val) || isa<Second, Rest...>(Val);
555}
556
557/// cast<X> - Return the argument parameter cast to the specified type. This
558/// casting operator asserts that the type is correct, so it does not return
559/// null on failure. It does not allow a null argument (use cast_if_present for
560/// that). It is typically used like this:
561///
562/// cast<Instruction>(myVal)->getParent()
563
564template <typename To, typename From>
565[[nodiscard]] inline decltype(auto) cast(const From &Val) {
566 assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<To>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<To>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 566, __extension__ __PRETTY_FUNCTION__
))
;
567 return CastInfo<To, const From>::doCast(Val);
568}
569
570template <typename To, typename From>
571[[nodiscard]] inline decltype(auto) cast(From &Val) {
572 assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<To>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<To>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 572, __extension__ __PRETTY_FUNCTION__
))
;
573 return CastInfo<To, From>::doCast(Val);
574}
575
576template <typename To, typename From>
577[[nodiscard]] inline decltype(auto) cast(From *Val) {
578 assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<To>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<To>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 578, __extension__ __PRETTY_FUNCTION__
))
;
579 return CastInfo<To, From *>::doCast(Val);
580}
581
582template <typename To, typename From>
583[[nodiscard]] inline decltype(auto) cast(std::unique_ptr<From> &&Val) {
584 assert(isa<To>(Val) && "cast<Ty>() argument of incompatible type!")(static_cast <bool> (isa<To>(Val) && "cast<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<To>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 584, __extension__ __PRETTY_FUNCTION__
))
;
585 return CastInfo<To, std::unique_ptr<From>>::doCast(std::move(Val));
586}
587
588/// dyn_cast<X> - Return the argument parameter cast to the specified type. This
589/// casting operator returns null if the argument is of the wrong type, so it
590/// can be used to test for a type as well as cast if successful. The value
591/// passed in must be present, if not, use dyn_cast_if_present. This should be
592/// used in the context of an if statement like this:
593///
594/// if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
595
596template <typename To, typename From>
597[[nodiscard]] inline decltype(auto) dyn_cast(const From &Val) {
598 return CastInfo<To, const From>::doCastIfPossible(Val);
599}
600
601template <typename To, typename From>
602[[nodiscard]] inline decltype(auto) dyn_cast(From &Val) {
603 return CastInfo<To, From>::doCastIfPossible(Val);
12
Calling 'CastInfo::doCastIfPossible'
17
Returning from 'CastInfo::doCastIfPossible'
18
Returning without writing to 'Val.Node'
604}
605
606template <typename To, typename From>
607[[nodiscard]] inline decltype(auto) dyn_cast(From *Val) {
608 return CastInfo<To, From *>::doCastIfPossible(Val);
609}
610
611template <typename To, typename From>
612[[nodiscard]] inline decltype(auto) dyn_cast(std::unique_ptr<From> &&Val) {
613 return CastInfo<To, std::unique_ptr<From>>::doCastIfPossible(std::move(Val));
614}
615
616//===----------------------------------------------------------------------===//
617// ValueIsPresent
618//===----------------------------------------------------------------------===//
619
620template <typename T>
621constexpr bool IsNullable = std::is_pointer<T>::value ||
622 std::is_constructible<T, std::nullptr_t>::value;
623
624/// ValueIsPresent provides a way to check if a value is, well, present. For
625/// pointers, this is the equivalent of checking against nullptr, for
626/// Optionals this is the equivalent of checking hasValue(). It also
627/// provides a method for unwrapping a value (think dereferencing a
628/// pointer).
629
630// Generic values can't *not* be present.
631template <typename T, typename Enable = void> struct ValueIsPresent {
632 using UnwrappedType = T;
633 static inline bool isPresent(const T &t) { return true; }
634 static inline decltype(auto) unwrapValue(T &t) { return t; }
635};
636
637// Optional provides its own way to check if something is present.
638template <typename T> struct ValueIsPresent<Optional<T>> {
639 using UnwrappedType = T;
640 static inline bool isPresent(const Optional<T> &t) { return t.has_value(); }
641 static inline decltype(auto) unwrapValue(Optional<T> &t) { return t.value(); }
642};
643
644// If something is "nullable" then we just compare it to nullptr to see if it
645// exists.
646template <typename T>
647struct ValueIsPresent<T, std::enable_if_t<IsNullable<T>>> {
648 using UnwrappedType = T;
649 static inline bool isPresent(const T &t) { return t != nullptr; }
650 static inline decltype(auto) unwrapValue(T &t) { return t; }
651};
652
653namespace detail {
654// Convenience function we can use to check if a value is present. Because of
655// simplify_type, we have to call it on the simplified type for now.
656template <typename T> inline bool isPresent(const T &t) {
657 return ValueIsPresent<typename simplify_type<T>::SimpleType>::isPresent(
658 simplify_type<T>::getSimplifiedValue(const_cast<T &>(t)));
659}
660
661// Convenience function we can use to unwrap a value.
662template <typename T> inline decltype(auto) unwrapValue(T &t) {
663 return ValueIsPresent<T>::unwrapValue(t);
664}
665} // namespace detail
666
667/// isa_and_present<X> - Functionally identical to isa, except that a null value
668/// is accepted.
669template <typename... X, class Y>
670[[nodiscard]] inline bool isa_and_present(const Y &Val) {
671 if (!detail::isPresent(Val))
672 return false;
673 return isa<X...>(Val);
674}
675
676template <typename... X, class Y>
677[[nodiscard]] inline bool isa_and_nonnull(const Y &Val) {
678 return isa_and_present<X...>(Val);
679}
680
681/// cast_if_present<X> - Functionally identical to cast, except that a null
682/// value is accepted.
683template <class X, class Y>
684[[nodiscard]] inline auto cast_if_present(const Y &Val) {
685 if (!detail::isPresent(Val))
686 return CastInfo<X, const Y>::castFailed();
687 assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_if_present<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 687, __extension__ __PRETTY_FUNCTION__
))
;
688 return cast<X>(detail::unwrapValue(Val));
689}
690
691template <class X, class Y> [[nodiscard]] inline auto cast_if_present(Y &Val) {
692 if (!detail::isPresent(Val))
693 return CastInfo<X, Y>::castFailed();
694 assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_if_present<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 694, __extension__ __PRETTY_FUNCTION__
))
;
695 return cast<X>(detail::unwrapValue(Val));
696}
697
698template <class X, class Y> [[nodiscard]] inline auto cast_if_present(Y *Val) {
699 if (!detail::isPresent(Val))
700 return CastInfo<X, Y *>::castFailed();
701 assert(isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!")(static_cast <bool> (isa<X>(Val) && "cast_if_present<Ty>() argument of incompatible type!"
) ? void (0) : __assert_fail ("isa<X>(Val) && \"cast_if_present<Ty>() argument of incompatible type!\""
, "llvm/include/llvm/Support/Casting.h", 701, __extension__ __PRETTY_FUNCTION__
))
;
702 return cast<X>(detail::unwrapValue(Val));
703}
704
705template <class X, class Y>
706[[nodiscard]] inline auto cast_if_present(std::unique_ptr<Y> &&Val) {
707 if (!detail::isPresent(Val))
708 return UniquePtrCast<X, Y>::castFailed();
709 return UniquePtrCast<X, Y>::doCast(std::move(Val));
710}
711
712// Provide a forwarding from cast_or_null to cast_if_present for current
713// users. This is deprecated and will be removed in a future patch, use
714// cast_if_present instead.
715template <class X, class Y> auto cast_or_null(const Y &Val) {
716 return cast_if_present<X>(Val);
717}
718
719template <class X, class Y> auto cast_or_null(Y &Val) {
720 return cast_if_present<X>(Val);
721}
722
723template <class X, class Y> auto cast_or_null(Y *Val) {
724 return cast_if_present<X>(Val);
725}
726
727template <class X, class Y> auto cast_or_null(std::unique_ptr<Y> &&Val) {
728 return cast_if_present<X>(std::move(Val));
729}
730
731/// dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a
732/// null (or none in the case of optionals) value is accepted.
733template <class X, class Y> auto dyn_cast_if_present(const Y &Val) {
734 if (!detail::isPresent(Val))
735 return CastInfo<X, const Y>::castFailed();
736 return CastInfo<X, const Y>::doCastIfPossible(detail::unwrapValue(Val));
737}
738
739template <class X, class Y> auto dyn_cast_if_present(Y &Val) {
740 if (!detail::isPresent(Val))
741 return CastInfo<X, Y>::castFailed();
742 return CastInfo<X, Y>::doCastIfPossible(detail::unwrapValue(Val));
743}
744
745template <class X, class Y> auto dyn_cast_if_present(Y *Val) {
746 if (!detail::isPresent(Val))
747 return CastInfo<X, Y *>::castFailed();
748 return CastInfo<X, Y *>::doCastIfPossible(detail::unwrapValue(Val));
749}
750
751// Forwards to dyn_cast_if_present to avoid breaking current users. This is
752// deprecated and will be removed in a future patch, use
753// cast_if_present instead.
754template <class X, class Y> auto dyn_cast_or_null(const Y &Val) {
755 return dyn_cast_if_present<X>(Val);
756}
757
758template <class X, class Y> auto dyn_cast_or_null(Y &Val) {
759 return dyn_cast_if_present<X>(Val);
760}
761
762template <class X, class Y> auto dyn_cast_or_null(Y *Val) {
763 return dyn_cast_if_present<X>(Val);
764}
765
766/// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
767/// taking ownership of the input pointer iff isa<X>(Val) is true. If the
768/// cast is successful, From refers to nullptr on exit and the casted value
769/// is returned. If the cast is unsuccessful, the function returns nullptr
770/// and From is unchanged.
771template <class X, class Y>
772[[nodiscard]] inline typename CastInfo<X, std::unique_ptr<Y>>::CastResultType
773unique_dyn_cast(std::unique_ptr<Y> &Val) {
774 if (!isa<X>(Val))
775 return nullptr;
776 return cast<X>(std::move(Val));
777}
778
779template <class X, class Y>
780[[nodiscard]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val) {
781 return unique_dyn_cast<X, Y>(Val);
782}
783
784// unique_dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast,
785// except that a null value is accepted.
786template <class X, class Y>
787[[nodiscard]] inline typename CastInfo<X, std::unique_ptr<Y>>::CastResultType
788unique_dyn_cast_or_null(std::unique_ptr<Y> &Val) {
789 if (!Val)
790 return nullptr;
791 return unique_dyn_cast<X, Y>(Val);
792}
793
794template <class X, class Y>
795[[nodiscard]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val) {
796 return unique_dyn_cast_or_null<X, Y>(Val);
797}
798
799} // end namespace llvm
800
801#endif // LLVM_SUPPORT_CASTING_H

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

1//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the SDNode class and derived classes, which are used to
10// represent the nodes and operations present in a SelectionDAG. These nodes
11// and operations are machine code level operations, with some similarities to
12// the GCC RTL representation.
13//
14// Clients should include the SelectionDAG.h file instead of this file directly.
15//
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
19#define LLVM_CODEGEN_SELECTIONDAGNODES_H
20
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/BitVector.h"
24#include "llvm/ADT/FoldingSet.h"
25#include "llvm/ADT/GraphTraits.h"
26#include "llvm/ADT/SmallPtrSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/ilist_node.h"
29#include "llvm/ADT/iterator.h"
30#include "llvm/ADT/iterator_range.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineMemOperand.h"
33#include "llvm/CodeGen/Register.h"
34#include "llvm/CodeGen/ValueTypes.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/Support/AlignOf.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/MachineValueType.h"
46#include "llvm/Support/TypeSize.h"
47#include <algorithm>
48#include <cassert>
49#include <climits>
50#include <cstddef>
51#include <cstdint>
52#include <cstring>
53#include <iterator>
54#include <string>
55#include <tuple>
56#include <utility>
57
58namespace llvm {
59
60class APInt;
61class Constant;
62class GlobalValue;
63class MachineBasicBlock;
64class MachineConstantPoolValue;
65class MCSymbol;
66class raw_ostream;
67class SDNode;
68class SelectionDAG;
69class Type;
70class Value;
71
72void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
73 bool force = false);
74
75/// This represents a list of ValueType's that has been intern'd by
76/// a SelectionDAG. Instances of this simple value class are returned by
77/// SelectionDAG::getVTList(...).
78///
79struct SDVTList {
80 const EVT *VTs;
81 unsigned int NumVTs;
82};
83
84namespace ISD {
85
86 /// Node predicates
87
88/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the
89/// same constant or undefined, return true and return the constant value in
90/// \p SplatValue.
91bool isConstantSplatVector(const SDNode *N, APInt &SplatValue);
92
93/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
94/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to
95/// true, it only checks BUILD_VECTOR.
96bool isConstantSplatVectorAllOnes(const SDNode *N,
97 bool BuildVectorOnly = false);
98
99/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where
100/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it
101/// only checks BUILD_VECTOR.
102bool isConstantSplatVectorAllZeros(const SDNode *N,
103 bool BuildVectorOnly = false);
104
105/// Return true if the specified node is a BUILD_VECTOR where all of the
106/// elements are ~0 or undef.
107bool isBuildVectorAllOnes(const SDNode *N);
108
109/// Return true if the specified node is a BUILD_VECTOR where all of the
110/// elements are 0 or undef.
111bool isBuildVectorAllZeros(const SDNode *N);
112
113/// Return true if the specified node is a BUILD_VECTOR node of all
114/// ConstantSDNode or undef.
115bool isBuildVectorOfConstantSDNodes(const SDNode *N);
116
117/// Return true if the specified node is a BUILD_VECTOR node of all
118/// ConstantFPSDNode or undef.
119bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
120
121/// Returns true if the specified node is a vector where all elements can
122/// be truncated to the specified element size without a loss in meaning.
123bool isVectorShrinkable(const SDNode *N, unsigned NewEltSize, bool Signed);
124
125/// Return true if the node has at least one operand and all operands of the
126/// specified node are ISD::UNDEF.
127bool allOperandsUndef(const SDNode *N);
128
129/// Return true if the specified node is FREEZE(UNDEF).
130bool isFreezeUndef(const SDNode *N);
131
132} // end namespace ISD
133
134//===----------------------------------------------------------------------===//
135/// Unlike LLVM values, Selection DAG nodes may return multiple
136/// values as the result of a computation. Many nodes return multiple values,
137/// from loads (which define a token and a return value) to ADDC (which returns
138/// a result and a carry value), to calls (which may return an arbitrary number
139/// of values).
140///
141/// As such, each use of a SelectionDAG computation must indicate the node that
142/// computes it as well as which return value to use from that node. This pair
143/// of information is represented with the SDValue value type.
144///
145class SDValue {
146 friend struct DenseMapInfo<SDValue>;
147
148 SDNode *Node = nullptr; // The node defining the value we are using.
149 unsigned ResNo = 0; // Which return value of the node we are using.
150
151public:
152 SDValue() = default;
153 SDValue(SDNode *node, unsigned resno);
154
155 /// get the index which selects a specific result in the SDNode
156 unsigned getResNo() const { return ResNo; }
157
158 /// get the SDNode which holds the desired result
159 SDNode *getNode() const { return Node; }
160
161 /// set the SDNode
162 void setNode(SDNode *N) { Node = N; }
163
164 inline SDNode *operator->() const { return Node; }
165
166 bool operator==(const SDValue &O) const {
167 return Node == O.Node && ResNo == O.ResNo;
168 }
169 bool operator!=(const SDValue &O) const {
170 return !operator==(O);
171 }
172 bool operator<(const SDValue &O) const {
173 return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo);
174 }
175 explicit operator bool() const {
176 return Node != nullptr;
177 }
178
179 SDValue getValue(unsigned R) const {
180 return SDValue(Node, R);
181 }
182
183 /// Return true if this node is an operand of N.
184 bool isOperandOf(const SDNode *N) const;
185
186 /// Return the ValueType of the referenced return value.
187 inline EVT getValueType() const;
188
189 /// Return the simple ValueType of the referenced return value.
190 MVT getSimpleValueType() const {
191 return getValueType().getSimpleVT();
192 }
193
194 /// Returns the size of the value in bits.
195 ///
196 /// If the value type is a scalable vector type, the scalable property will
197 /// be set and the runtime size will be a positive integer multiple of the
198 /// base size.
199 TypeSize getValueSizeInBits() const {
200 return getValueType().getSizeInBits();
201 }
202
203 uint64_t getScalarValueSizeInBits() const {
204 return getValueType().getScalarType().getFixedSizeInBits();
205 }
206
207 // Forwarding methods - These forward to the corresponding methods in SDNode.
208 inline unsigned getOpcode() const;
209 inline unsigned getNumOperands() const;
210 inline const SDValue &getOperand(unsigned i) const;
211 inline uint64_t getConstantOperandVal(unsigned i) const;
212 inline const APInt &getConstantOperandAPInt(unsigned i) const;
213 inline bool isTargetMemoryOpcode() const;
214 inline bool isTargetOpcode() const;
215 inline bool isMachineOpcode() const;
216 inline bool isUndef() const;
217 inline unsigned getMachineOpcode() const;
218 inline const DebugLoc &getDebugLoc() const;
219 inline void dump() const;
220 inline void dump(const SelectionDAG *G) const;
221 inline void dumpr() const;
222 inline void dumpr(const SelectionDAG *G) const;
223
224 /// Return true if this operand (which must be a chain) reaches the
225 /// specified operand without crossing any side-effecting instructions.
226 /// In practice, this looks through token factors and non-volatile loads.
227 /// In order to remain efficient, this only
228 /// looks a couple of nodes in, it does not do an exhaustive search.
229 bool reachesChainWithoutSideEffects(SDValue Dest,
230 unsigned Depth = 2) const;
231
232 /// Return true if there are no nodes using value ResNo of Node.
233 inline bool use_empty() const;
234
235 /// Return true if there is exactly one node using value ResNo of Node.
236 inline bool hasOneUse() const;
237};
238
239template<> struct DenseMapInfo<SDValue> {
240 static inline SDValue getEmptyKey() {
241 SDValue V;
242 V.ResNo = -1U;
243 return V;
244 }
245
246 static inline SDValue getTombstoneKey() {
247 SDValue V;
248 V.ResNo = -2U;
249 return V;
250 }
251
252 static unsigned getHashValue(const SDValue &Val) {
253 return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
254 (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
255 }
256
257 static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
258 return LHS == RHS;
259 }
260};
261
262/// Allow casting operators to work directly on
263/// SDValues as if they were SDNode*'s.
264template<> struct simplify_type<SDValue> {
265 using SimpleType = SDNode *;
266
267 static SimpleType getSimplifiedValue(SDValue &Val) {
268 return Val.getNode();
14
Returning without writing to 'Val.Node'
269 }
270};
271template<> struct simplify_type<const SDValue> {
272 using SimpleType = /*const*/ SDNode *;
273
274 static SimpleType getSimplifiedValue(const SDValue &Val) {
275 return Val.getNode();
276 }
277};
278
279/// Represents a use of a SDNode. This class holds an SDValue,
280/// which records the SDNode being used and the result number, a
281/// pointer to the SDNode using the value, and Next and Prev pointers,
282/// which link together all the uses of an SDNode.
283///
284class SDUse {
285 /// Val - The value being used.
286 SDValue Val;
287 /// User - The user of this value.
288 SDNode *User = nullptr;
289 /// Prev, Next - Pointers to the uses list of the SDNode referred by
290 /// this operand.
291 SDUse **Prev = nullptr;
292 SDUse *Next = nullptr;
293
294public:
295 SDUse() = default;
296 SDUse(const SDUse &U) = delete;
297 SDUse &operator=(const SDUse &) = delete;
298
299 /// Normally SDUse will just implicitly convert to an SDValue that it holds.
300 operator const SDValue&() const { return Val; }
301
302 /// If implicit conversion to SDValue doesn't work, the get() method returns
303 /// the SDValue.
304 const SDValue &get() const { return Val; }
305
306 /// This returns the SDNode that contains this Use.
307 SDNode *getUser() { return User; }
308
309 /// Get the next SDUse in the use list.
310 SDUse *getNext() const { return Next; }
311
312 /// Convenience function for get().getNode().
313 SDNode *getNode() const { return Val.getNode(); }
314 /// Convenience function for get().getResNo().
315 unsigned getResNo() const { return Val.getResNo(); }
316 /// Convenience function for get().getValueType().
317 EVT getValueType() const { return Val.getValueType(); }
318
319 /// Convenience function for get().operator==
320 bool operator==(const SDValue &V) const {
321 return Val == V;
322 }
323
324 /// Convenience function for get().operator!=
325 bool operator!=(const SDValue &V) const {
326 return Val != V;
327 }
328
329 /// Convenience function for get().operator<
330 bool operator<(const SDValue &V) const {
331 return Val < V;
332 }
333
334private:
335 friend class SelectionDAG;
336 friend class SDNode;
337 // TODO: unfriend HandleSDNode once we fix its operand handling.
338 friend class HandleSDNode;
339
340 void setUser(SDNode *p) { User = p; }
341
342 /// Remove this use from its existing use list, assign it the
343 /// given value, and add it to the new value's node's use list.
344 inline void set(const SDValue &V);
345 /// Like set, but only supports initializing a newly-allocated
346 /// SDUse with a non-null value.
347 inline void setInitial(const SDValue &V);
348 /// Like set, but only sets the Node portion of the value,
349 /// leaving the ResNo portion unmodified.
350 inline void setNode(SDNode *N);
351
352 void addToList(SDUse **List) {
353 Next = *List;
354 if (Next) Next->Prev = &Next;
355 Prev = List;
356 *List = this;
357 }
358
359 void removeFromList() {
360 *Prev = Next;
361 if (Next) Next->Prev = Prev;
362 }
363};
364
365/// simplify_type specializations - Allow casting operators to work directly on
366/// SDValues as if they were SDNode*'s.
367template<> struct simplify_type<SDUse> {
368 using SimpleType = SDNode *;
369
370 static SimpleType getSimplifiedValue(SDUse &Val) {
371 return Val.getNode();
372 }
373};
374
375/// These are IR-level optimization flags that may be propagated to SDNodes.
376/// TODO: This data structure should be shared by the IR optimizer and the
377/// the backend.
378struct SDNodeFlags {
379private:
380 bool NoUnsignedWrap : 1;
381 bool NoSignedWrap : 1;
382 bool Exact : 1;
383 bool NoNaNs : 1;
384 bool NoInfs : 1;
385 bool NoSignedZeros : 1;
386 bool AllowReciprocal : 1;
387 bool AllowContract : 1;
388 bool ApproximateFuncs : 1;
389 bool AllowReassociation : 1;
390
391 // We assume instructions do not raise floating-point exceptions by default,
392 // and only those marked explicitly may do so. We could choose to represent
393 // this via a positive "FPExcept" flags like on the MI level, but having a
394 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
395 // intersection logic more straightforward.
396 bool NoFPExcept : 1;
397
398public:
399 /// Default constructor turns off all optimization flags.
400 SDNodeFlags()
401 : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
402 NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
403 AllowContract(false), ApproximateFuncs(false),
404 AllowReassociation(false), NoFPExcept(false) {}
405
406 /// Propagate the fast-math-flags from an IR FPMathOperator.
407 void copyFMF(const FPMathOperator &FPMO) {
408 setNoNaNs(FPMO.hasNoNaNs());
409 setNoInfs(FPMO.hasNoInfs());
410 setNoSignedZeros(FPMO.hasNoSignedZeros());
411 setAllowReciprocal(FPMO.hasAllowReciprocal());
412 setAllowContract(FPMO.hasAllowContract());
413 setApproximateFuncs(FPMO.hasApproxFunc());
414 setAllowReassociation(FPMO.hasAllowReassoc());
415 }
416
417 // These are mutators for each flag.
418 void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
419 void setNoSignedWrap(bool b) { NoSignedWrap = b; }
420 void setExact(bool b) { Exact = b; }
421 void setNoNaNs(bool b) { NoNaNs = b; }
422 void setNoInfs(bool b) { NoInfs = b; }
423 void setNoSignedZeros(bool b) { NoSignedZeros = b; }
424 void setAllowReciprocal(bool b) { AllowReciprocal = b; }
425 void setAllowContract(bool b) { AllowContract = b; }
426 void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
427 void setAllowReassociation(bool b) { AllowReassociation = b; }
428 void setNoFPExcept(bool b) { NoFPExcept = b; }
429
430 // These are accessors for each flag.
431 bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
432 bool hasNoSignedWrap() const { return NoSignedWrap; }
433 bool hasExact() const { return Exact; }
434 bool hasNoNaNs() const { return NoNaNs; }
435 bool hasNoInfs() const { return NoInfs; }
436 bool hasNoSignedZeros() const { return NoSignedZeros; }
437 bool hasAllowReciprocal() const { return AllowReciprocal; }
438 bool hasAllowContract() const { return AllowContract; }
439 bool hasApproximateFuncs() const { return ApproximateFuncs; }
440 bool hasAllowReassociation() const { return AllowReassociation; }
441 bool hasNoFPExcept() const { return NoFPExcept; }
442
443 /// Clear any flags in this flag set that aren't also set in Flags. All
444 /// flags will be cleared if Flags are undefined.
445 void intersectWith(const SDNodeFlags Flags) {
446 NoUnsignedWrap &= Flags.NoUnsignedWrap;
447 NoSignedWrap &= Flags.NoSignedWrap;
448 Exact &= Flags.Exact;
449 NoNaNs &= Flags.NoNaNs;
450 NoInfs &= Flags.NoInfs;
451 NoSignedZeros &= Flags.NoSignedZeros;
452 AllowReciprocal &= Flags.AllowReciprocal;
453 AllowContract &= Flags.AllowContract;
454 ApproximateFuncs &= Flags.ApproximateFuncs;
455 AllowReassociation &= Flags.AllowReassociation;
456 NoFPExcept &= Flags.NoFPExcept;
457 }
458};
459
460/// Represents one node in the SelectionDAG.
461///
462class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
463private:
464 /// The operation that this node performs.
465 int16_t NodeType;
466
467protected:
468 // We define a set of mini-helper classes to help us interpret the bits in our
469 // SubclassData. These are designed to fit within a uint16_t so they pack
470 // with NodeType.
471
472#if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1))
473// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
474// and give the `pack` pragma push semantics.
475#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2)
476#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop)
477#else
478#define BEGIN_TWO_BYTE_PACK()
479#define END_TWO_BYTE_PACK()
480#endif
481
482BEGIN_TWO_BYTE_PACK()
483 class SDNodeBitfields {
484 friend class SDNode;
485 friend class MemIntrinsicSDNode;
486 friend class MemSDNode;
487 friend class SelectionDAG;
488
489 uint16_t HasDebugValue : 1;
490 uint16_t IsMemIntrinsic : 1;
491 uint16_t IsDivergent : 1;
492 };
493 enum { NumSDNodeBits = 3 };
494
495 class ConstantSDNodeBitfields {
496 friend class ConstantSDNode;
497
498 uint16_t : NumSDNodeBits;
499
500 uint16_t IsOpaque : 1;
501 };
502
503 class MemSDNodeBitfields {
504 friend class MemSDNode;
505 friend class MemIntrinsicSDNode;
506 friend class AtomicSDNode;
507
508 uint16_t : NumSDNodeBits;
509
510 uint16_t IsVolatile : 1;
511 uint16_t IsNonTemporal : 1;
512 uint16_t IsDereferenceable : 1;
513 uint16_t IsInvariant : 1;
514 };
515 enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
516
517 class LSBaseSDNodeBitfields {
518 friend class LSBaseSDNode;
519 friend class VPBaseLoadStoreSDNode;
520 friend class MaskedLoadStoreSDNode;
521 friend class MaskedGatherScatterSDNode;
522 friend class VPGatherScatterSDNode;
523
524 uint16_t : NumMemSDNodeBits;
525
526 // This storage is shared between disparate class hierarchies to hold an
527 // enumeration specific to the class hierarchy in use.
528 // LSBaseSDNode => enum ISD::MemIndexedMode
529 // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
530 // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
531 // VPGatherScatterSDNode => enum ISD::MemIndexType
532 // MaskedGatherScatterSDNode => enum ISD::MemIndexType
533 uint16_t AddressingMode : 3;
534 };
535 enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
536
537 class LoadSDNodeBitfields {
538 friend class LoadSDNode;
539 friend class VPLoadSDNode;
540 friend class VPStridedLoadSDNode;
541 friend class MaskedLoadSDNode;
542 friend class MaskedGatherSDNode;
543 friend class VPGatherSDNode;
544
545 uint16_t : NumLSBaseSDNodeBits;
546
547 uint16_t ExtTy : 2; // enum ISD::LoadExtType
548 uint16_t IsExpanding : 1;
549 };
550
551 class StoreSDNodeBitfields {
552 friend class StoreSDNode;
553 friend class VPStoreSDNode;
554 friend class VPStridedStoreSDNode;
555 friend class MaskedStoreSDNode;
556 friend class MaskedScatterSDNode;
557 friend class VPScatterSDNode;
558
559 uint16_t : NumLSBaseSDNodeBits;
560
561 uint16_t IsTruncating : 1;
562 uint16_t IsCompressing : 1;
563 };
564
565 union {
566 char RawSDNodeBits[sizeof(uint16_t)];
567 SDNodeBitfields SDNodeBits;
568 ConstantSDNodeBitfields ConstantSDNodeBits;
569 MemSDNodeBitfields MemSDNodeBits;
570 LSBaseSDNodeBitfields LSBaseSDNodeBits;
571 LoadSDNodeBitfields LoadSDNodeBits;
572 StoreSDNodeBitfields StoreSDNodeBits;
573 };
574END_TWO_BYTE_PACK()
575#undef BEGIN_TWO_BYTE_PACK
576#undef END_TWO_BYTE_PACK
577
578 // RawSDNodeBits must cover the entirety of the union. This means that all of
579 // the union's members must have size <= RawSDNodeBits. We write the RHS as
580 // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
581 static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
582 static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
583 static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
584 static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
585 static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide");
586 static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
587
588private:
589 friend class SelectionDAG;
590 // TODO: unfriend HandleSDNode once we fix its operand handling.
591 friend class HandleSDNode;
592
593 /// Unique id per SDNode in the DAG.
594 int NodeId = -1;
595
596 /// The values that are used by this operation.
597 SDUse *OperandList = nullptr;
598
599 /// The types of the values this node defines. SDNode's may
600 /// define multiple values simultaneously.
601 const EVT *ValueList;
602
603 /// List of uses for this SDNode.
604 SDUse *UseList = nullptr;
605
606 /// The number of entries in the Operand/Value list.
607 unsigned short NumOperands = 0;
608 unsigned short NumValues;
609
610 // The ordering of the SDNodes. It roughly corresponds to the ordering of the
611 // original LLVM instructions.
612 // This is used for turning off scheduling, because we'll forgo
613 // the normal scheduling algorithms and output the instructions according to
614 // this ordering.
615 unsigned IROrder;
616
617 /// Source line information.
618 DebugLoc debugLoc;
619
620 /// Return a pointer to the specified value type.
621 static const EVT *getValueTypeList(EVT VT);
622
623 SDNodeFlags Flags;
624
625 uint32_t CFIType = 0;
626
627public:
628 /// Unique and persistent id per SDNode in the DAG. Used for debug printing.
629 /// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS`
630 /// intentionally because it adds unneeded complexity without noticeable
631 /// benefits (see discussion with @thakis in D120714).
632 uint16_t PersistentId;
633
634 //===--------------------------------------------------------------------===//
635 // Accessors
636 //
637
638 /// Return the SelectionDAG opcode value for this node. For
639 /// pre-isel nodes (those for which isMachineOpcode returns false), these
640 /// are the opcode values in the ISD and <target>ISD namespaces. For
641 /// post-isel opcodes, see getMachineOpcode.
642 unsigned getOpcode() const { return (unsigned short)NodeType; }
643
644 /// Test if this node has a target-specific opcode (in the
645 /// \<target\>ISD namespace).
646 bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
647
648 /// Test if this node has a target-specific opcode that may raise
649 /// FP exceptions (in the \<target\>ISD namespace and greater than
650 /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory
651 /// opcode are currently automatically considered to possibly raise
652 /// FP exceptions as well.
653 bool isTargetStrictFPOpcode() const {
654 return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE;
655 }
656
657 /// Test if this node has a target-specific
658 /// memory-referencing opcode (in the \<target\>ISD namespace and
659 /// greater than FIRST_TARGET_MEMORY_OPCODE).
660 bool isTargetMemoryOpcode() const {
661 return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
662 }
663
664 /// Return true if the type of the node type undefined.
665 bool isUndef() const { return NodeType == ISD::UNDEF; }
666
667 /// Test if this node is a memory intrinsic (with valid pointer information).
668 /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
669 /// non-memory intrinsics (with chains) that are not really instances of
670 /// MemSDNode. For such nodes, we need some extra state to determine the
671 /// proper classof relationship.
672 bool isMemIntrinsic() const {
673 return (NodeType == ISD::INTRINSIC_W_CHAIN ||
674 NodeType == ISD::INTRINSIC_VOID) &&
675 SDNodeBits.IsMemIntrinsic;
676 }
677
678 /// Test if this node is a strict floating point pseudo-op.
679 bool isStrictFPOpcode() {
680 switch (NodeType) {
681 default:
682 return false;
683 case ISD::STRICT_FP16_TO_FP:
684 case ISD::STRICT_FP_TO_FP16:
685#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
686 case ISD::STRICT_##DAGN:
687#include "llvm/IR/ConstrainedOps.def"
688 return true;
689 }
690 }
691
692 /// Test if this node is a vector predication operation.
693 bool isVPOpcode() const { return ISD::isVPOpcode(getOpcode()); }
694
695 /// Test if this node has a post-isel opcode, directly
696 /// corresponding to a MachineInstr opcode.
697 bool isMachineOpcode() const { return NodeType < 0; }
698
699 /// This may only be called if isMachineOpcode returns
700 /// true. It returns the MachineInstr opcode value that the node's opcode
701 /// corresponds to.
702 unsigned getMachineOpcode() const {
703 assert(isMachineOpcode() && "Not a MachineInstr opcode!")(static_cast <bool> (isMachineOpcode() && "Not a MachineInstr opcode!"
) ? void (0) : __assert_fail ("isMachineOpcode() && \"Not a MachineInstr opcode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 703, __extension__
__PRETTY_FUNCTION__))
;
704 return ~NodeType;
705 }
706
707 bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
708 void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
709
710 bool isDivergent() const { return SDNodeBits.IsDivergent; }
711
712 /// Return true if there are no uses of this node.
713 bool use_empty() const { return UseList == nullptr; }
714
715 /// Return true if there is exactly one use of this node.
716 bool hasOneUse() const { return hasSingleElement(uses()); }
717
718 /// Return the number of uses of this node. This method takes
719 /// time proportional to the number of uses.
720 size_t use_size() const { return std::distance(use_begin(), use_end()); }
721
722 /// Return the unique node id.
723 int getNodeId() const { return NodeId; }
724
725 /// Set unique node id.
726 void setNodeId(int Id) { NodeId = Id; }
727
728 /// Return the node ordering.
729 unsigned getIROrder() const { return IROrder; }
730
731 /// Set the node ordering.
732 void setIROrder(unsigned Order) { IROrder = Order; }
733
734 /// Return the source location info.
735 const DebugLoc &getDebugLoc() const { return debugLoc; }
736
737 /// Set source location info. Try to avoid this, putting
738 /// it in the constructor is preferable.
739 void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); }
740
741 /// This class provides iterator support for SDUse
742 /// operands that use a specific SDNode.
743 class use_iterator {
744 friend class SDNode;
745
746 SDUse *Op = nullptr;
747
748 explicit use_iterator(SDUse *op) : Op(op) {}
749
750 public:
751 using iterator_category = std::forward_iterator_tag;
752 using value_type = SDUse;
753 using difference_type = std::ptrdiff_t;
754 using pointer = value_type *;
755 using reference = value_type &;
756
757 use_iterator() = default;
758 use_iterator(const use_iterator &I) = default;
759
760 bool operator==(const use_iterator &x) const { return Op == x.Op; }
761 bool operator!=(const use_iterator &x) const {
762 return !operator==(x);
763 }
764
765 /// Return true if this iterator is at the end of uses list.
766 bool atEnd() const { return Op == nullptr; }
767
768 // Iterator traversal: forward iteration only.
769 use_iterator &operator++() { // Preincrement
770 assert(Op && "Cannot increment end iterator!")(static_cast <bool> (Op && "Cannot increment end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot increment end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 770, __extension__
__PRETTY_FUNCTION__))
;
771 Op = Op->getNext();
772 return *this;
773 }
774
775 use_iterator operator++(int) { // Postincrement
776 use_iterator tmp = *this; ++*this; return tmp;
777 }
778
779 /// Retrieve a pointer to the current user node.
780 SDNode *operator*() const {
781 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 781, __extension__
__PRETTY_FUNCTION__))
;
782 return Op->getUser();
783 }
784
785 SDNode *operator->() const { return operator*(); }
786
787 SDUse &getUse() const { return *Op; }
788
789 /// Retrieve the operand # of this use in its user.
790 unsigned getOperandNo() const {
791 assert(Op && "Cannot dereference end iterator!")(static_cast <bool> (Op && "Cannot dereference end iterator!"
) ? void (0) : __assert_fail ("Op && \"Cannot dereference end iterator!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 791, __extension__
__PRETTY_FUNCTION__))
;
792 return (unsigned)(Op - Op->getUser()->OperandList);
793 }
794 };
795
796 /// Provide iteration support to walk over all uses of an SDNode.
797 use_iterator use_begin() const {
798 return use_iterator(UseList);
799 }
800
801 static use_iterator use_end() { return use_iterator(nullptr); }
802
803 inline iterator_range<use_iterator> uses() {
804 return make_range(use_begin(), use_end());
805 }
806 inline iterator_range<use_iterator> uses() const {
807 return make_range(use_begin(), use_end());
808 }
809
810 /// Return true if there are exactly NUSES uses of the indicated value.
811 /// This method ignores uses of other values defined by this operation.
812 bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
813
814 /// Return true if there are any use of the indicated value.
815 /// This method ignores uses of other values defined by this operation.
816 bool hasAnyUseOfValue(unsigned Value) const;
817
818 /// Return true if this node is the only use of N.
819 bool isOnlyUserOf(const SDNode *N) const;
820
821 /// Return true if this node is an operand of N.
822 bool isOperandOf(const SDNode *N) const;
823
824 /// Return true if this node is a predecessor of N.
825 /// NOTE: Implemented on top of hasPredecessor and every bit as
826 /// expensive. Use carefully.
827 bool isPredecessorOf(const SDNode *N) const {
828 return N->hasPredecessor(this);
829 }
830
831 /// Return true if N is a predecessor of this node.
832 /// N is either an operand of this node, or can be reached by recursively
833 /// traversing up the operands.
834 /// NOTE: This is an expensive method. Use it carefully.
835 bool hasPredecessor(const SDNode *N) const;
836
837 /// Returns true if N is a predecessor of any node in Worklist. This
838 /// helper keeps Visited and Worklist sets externally to allow unions
839 /// searches to be performed in parallel, caching of results across
840 /// queries and incremental addition to Worklist. Stops early if N is
841 /// found but will resume. Remember to clear Visited and Worklists
842 /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
843 /// giving up. The TopologicalPrune flag signals that positive NodeIds are
844 /// topologically ordered (Operands have strictly smaller node id) and search
845 /// can be pruned leveraging this.
846 static bool hasPredecessorHelper(const SDNode *N,
847 SmallPtrSetImpl<const SDNode *> &Visited,
848 SmallVectorImpl<const SDNode *> &Worklist,
849 unsigned int MaxSteps = 0,
850 bool TopologicalPrune = false) {
851 SmallVector<const SDNode *, 8> DeferredNodes;
852 if (Visited.count(N))
853 return true;
854
855 // Node Id's are assigned in three places: As a topological
856 // ordering (> 0), during legalization (results in values set to
857 // 0), new nodes (set to -1). If N has a topolgical id then we
858 // know that all nodes with ids smaller than it cannot be
859 // successors and we need not check them. Filter out all node
860 // that can't be matches. We add them to the worklist before exit
861 // in case of multiple calls. Note that during selection the topological id
862 // may be violated if a node's predecessor is selected before it. We mark
863 // this at selection negating the id of unselected successors and
864 // restricting topological pruning to positive ids.
865
866 int NId = N->getNodeId();
867 // If we Invalidated the Id, reconstruct original NId.
868 if (NId < -1)
869 NId = -(NId + 1);
870
871 bool Found = false;
872 while (!Worklist.empty()) {
873 const SDNode *M = Worklist.pop_back_val();
874 int MId = M->getNodeId();
875 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
876 (MId > 0) && (MId < NId)) {
877 DeferredNodes.push_back(M);
878 continue;
879 }
880 for (const SDValue &OpV : M->op_values()) {
881 SDNode *Op = OpV.getNode();
882 if (Visited.insert(Op).second)
883 Worklist.push_back(Op);
884 if (Op == N)
885 Found = true;
886 }
887 if (Found)
888 break;
889 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
890 break;
891 }
892 // Push deferred nodes back on worklist.
893 Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
894 // If we bailed early, conservatively return found.
895 if (MaxSteps != 0 && Visited.size() >= MaxSteps)
896 return true;
897 return Found;
898 }
899
900 /// Return true if all the users of N are contained in Nodes.
901 /// NOTE: Requires at least one match, but doesn't require them all.
902 static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N);
903
904 /// Return the number of values used by this operation.
905 unsigned getNumOperands() const { return NumOperands; }
906
907 /// Return the maximum number of operands that a SDNode can hold.
908 static constexpr size_t getMaxNumOperands() {
909 return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
910 }
911
912 /// Helper method returns the integer value of a ConstantSDNode operand.
913 inline uint64_t getConstantOperandVal(unsigned Num) const;
914
915 /// Helper method returns the APInt of a ConstantSDNode operand.
916 inline const APInt &getConstantOperandAPInt(unsigned Num) const;
917
918 const SDValue &getOperand(unsigned Num) const {
919 assert(Num < NumOperands && "Invalid child # of SDNode!")(static_cast <bool> (Num < NumOperands && "Invalid child # of SDNode!"
) ? void (0) : __assert_fail ("Num < NumOperands && \"Invalid child # of SDNode!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 919, __extension__
__PRETTY_FUNCTION__))
;
920 return OperandList[Num];
921 }
922
923 using op_iterator = SDUse *;
924
925 op_iterator op_begin() const { return OperandList; }
926 op_iterator op_end() const { return OperandList+NumOperands; }
927 ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
928
929 /// Iterator for directly iterating over the operand SDValue's.
930 struct value_op_iterator
931 : iterator_adaptor_base<value_op_iterator, op_iterator,
932 std::random_access_iterator_tag, SDValue,
933 ptrdiff_t, value_op_iterator *,
934 value_op_iterator *> {
935 explicit value_op_iterator(SDUse *U = nullptr)
936 : iterator_adaptor_base(U) {}
937
938 const SDValue &operator*() const { return I->get(); }
939 };
940
941 iterator_range<value_op_iterator> op_values() const {
942 return make_range(value_op_iterator(op_begin()),
943 value_op_iterator(op_end()));
944 }
945
946 SDVTList getVTList() const {
947 SDVTList X = { ValueList, NumValues };
948 return X;
949 }
950
951 /// If this node has a glue operand, return the node
952 /// to which the glue operand points. Otherwise return NULL.
953 SDNode *getGluedNode() const {
954 if (getNumOperands() != 0 &&
955 getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
956 return getOperand(getNumOperands()-1).getNode();
957 return nullptr;
958 }
959
960 /// If this node has a glue value with a user, return
961 /// the user (there is at most one). Otherwise return NULL.
962 SDNode *getGluedUser() const {
963 for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
964 if (UI.getUse().get().getValueType() == MVT::Glue)
965 return *UI;
966 return nullptr;
967 }
968
969 SDNodeFlags getFlags() const { return Flags; }
970 void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; }
971
972 /// Clear any flags in this node that aren't also set in Flags.
973 /// If Flags is not in a defined state then this has no effect.
974 void intersectFlagsWith(const SDNodeFlags Flags);
975
976 void setCFIType(uint32_t Type) { CFIType = Type; }
977 uint32_t getCFIType() const { return CFIType; }
978
979 /// Return the number of values defined/returned by this operator.
980 unsigned getNumValues() const { return NumValues; }
981
982 /// Return the type of a specified result.
983 EVT getValueType(unsigned ResNo) const {
984 assert(ResNo < NumValues && "Illegal result number!")(static_cast <bool> (ResNo < NumValues && "Illegal result number!"
) ? void (0) : __assert_fail ("ResNo < NumValues && \"Illegal result number!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 984, __extension__
__PRETTY_FUNCTION__))
;
985 return ValueList[ResNo];
986 }
987
988 /// Return the type of a specified result as a simple type.
989 MVT getSimpleValueType(unsigned ResNo) const {
990 return getValueType(ResNo).getSimpleVT();
991 }
992
993 /// Returns MVT::getSizeInBits(getValueType(ResNo)).
994 ///
995 /// If the value type is a scalable vector type, the scalable property will
996 /// be set and the runtime size will be a positive integer multiple of the
997 /// base size.
998 TypeSize getValueSizeInBits(unsigned ResNo) const {
999 return getValueType(ResNo).getSizeInBits();
1000 }
1001
1002 using value_iterator = const EVT *;
1003
1004 value_iterator value_begin() const { return ValueList; }
1005 value_iterator value_end() const { return ValueList+NumValues; }
1006 iterator_range<value_iterator> values() const {
1007 return llvm::make_range(value_begin(), value_end());
1008 }
1009
1010 /// Return the opcode of this operation for printing.
1011 std::string getOperationName(const SelectionDAG *G = nullptr) const;
1012 static const char* getIndexedModeName(ISD::MemIndexedMode AM);
1013 void print_types(raw_ostream &OS, const SelectionDAG *G) const;
1014 void print_details(raw_ostream &OS, const SelectionDAG *G) const;
1015 void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1016 void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const;
1017
1018 /// Print a SelectionDAG node and all children down to
1019 /// the leaves. The given SelectionDAG allows target-specific nodes
1020 /// to be printed in human-readable form. Unlike printr, this will
1021 /// print the whole DAG, including children that appear multiple
1022 /// times.
1023 ///
1024 void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const;
1025
1026 /// Print a SelectionDAG node and children up to
1027 /// depth "depth." The given SelectionDAG allows target-specific
1028 /// nodes to be printed in human-readable form. Unlike printr, this
1029 /// will print children that appear multiple times wherever they are
1030 /// used.
1031 ///
1032 void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
1033 unsigned depth = 100) const;
1034
1035 /// Dump this node, for debugging.
1036 void dump() const;
1037
1038 /// Dump (recursively) this node and its use-def subgraph.
1039 void dumpr() const;
1040
1041 /// Dump this node, for debugging.
1042 /// The given SelectionDAG allows target-specific nodes to be printed
1043 /// in human-readable form.
1044 void dump(const SelectionDAG *G) const;
1045
1046 /// Dump (recursively) this node and its use-def subgraph.
1047 /// The given SelectionDAG allows target-specific nodes to be printed
1048 /// in human-readable form.
1049 void dumpr(const SelectionDAG *G) const;
1050
1051 /// printrFull to dbgs(). The given SelectionDAG allows
1052 /// target-specific nodes to be printed in human-readable form.
1053 /// Unlike dumpr, this will print the whole DAG, including children
1054 /// that appear multiple times.
1055 void dumprFull(const SelectionDAG *G = nullptr) const;
1056
1057 /// printrWithDepth to dbgs(). The given
1058 /// SelectionDAG allows target-specific nodes to be printed in
1059 /// human-readable form. Unlike dumpr, this will print children
1060 /// that appear multiple times wherever they are used.
1061 ///
1062 void dumprWithDepth(const SelectionDAG *G = nullptr,
1063 unsigned depth = 100) const;
1064
1065 /// Gather unique data for the node.
1066 void Profile(FoldingSetNodeID &ID) const;
1067
1068 /// This method should only be used by the SDUse class.
1069 void addUse(SDUse &U) { U.addToList(&UseList); }
1070
1071protected:
1072 static SDVTList getSDVTList(EVT VT) {
1073 SDVTList Ret = { getValueTypeList(VT), 1 };
1074 return Ret;
1075 }
1076
1077 /// Create an SDNode.
1078 ///
1079 /// SDNodes are created without any operands, and never own the operand
1080 /// storage. To add operands, see SelectionDAG::createOperands.
1081 SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
1082 : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs),
1083 IROrder(Order), debugLoc(std::move(dl)) {
1084 memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
1085 assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")(static_cast <bool> (debugLoc.hasTrivialDestructor() &&
"Expected trivial destructor") ? void (0) : __assert_fail ("debugLoc.hasTrivialDestructor() && \"Expected trivial destructor\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1085, __extension__
__PRETTY_FUNCTION__))
;
1086 assert(NumValues == VTs.NumVTs &&(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1087, __extension__
__PRETTY_FUNCTION__))
1087 "NumValues wasn't wide enough for its operands!")(static_cast <bool> (NumValues == VTs.NumVTs &&
"NumValues wasn't wide enough for its operands!") ? void (0)
: __assert_fail ("NumValues == VTs.NumVTs && \"NumValues wasn't wide enough for its operands!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1087, __extension__
__PRETTY_FUNCTION__))
;
1088 }
1089
1090 /// Release the operands and set this node to have zero operands.
1091 void DropOperands();
1092};
1093
1094/// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed
1095/// into SDNode creation functions.
1096/// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted
1097/// from the original Instruction, and IROrder is the ordinal position of
1098/// the instruction.
1099/// When an SDNode is created after the DAG is being built, both DebugLoc and
1100/// the IROrder are propagated from the original SDNode.
1101/// So SDLoc class provides two constructors besides the default one, one to
1102/// be used by the DAGBuilder, the other to be used by others.
1103class SDLoc {
1104private:
1105 DebugLoc DL;
1106 int IROrder = 0;
1107
1108public:
1109 SDLoc() = default;
1110 SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {}
1111 SDLoc(const SDValue V) : SDLoc(V.getNode()) {}
1112 SDLoc(const Instruction *I, int Order) : IROrder(Order) {
1113 assert(Order >= 0 && "bad IROrder")(static_cast <bool> (Order >= 0 && "bad IROrder"
) ? void (0) : __assert_fail ("Order >= 0 && \"bad IROrder\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1113, __extension__
__PRETTY_FUNCTION__))
;
1114 if (I)
1115 DL = I->getDebugLoc();
1116 }
1117
1118 unsigned getIROrder() const { return IROrder; }
1119 const DebugLoc &getDebugLoc() const { return DL; }
1120};
1121
1122// Define inline functions from the SDValue class.
1123
1124inline SDValue::SDValue(SDNode *node, unsigned resno)
1125 : Node(node), ResNo(resno) {
1126 // Explicitly check for !ResNo to avoid use-after-free, because there are
1127 // callers that use SDValue(N, 0) with a deleted N to indicate successful
1128 // combines.
1129 assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1130, __extension__
__PRETTY_FUNCTION__))
1130 "Invalid result number for the given node!")(static_cast <bool> ((!Node || !ResNo || ResNo < Node
->getNumValues()) && "Invalid result number for the given node!"
) ? void (0) : __assert_fail ("(!Node || !ResNo || ResNo < Node->getNumValues()) && \"Invalid result number for the given node!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1130, __extension__
__PRETTY_FUNCTION__))
;
1131 assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")(static_cast <bool> (ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."
) ? void (0) : __assert_fail ("ResNo < -2U && \"Cannot use result numbers reserved for DenseMaps.\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1131, __extension__
__PRETTY_FUNCTION__))
;
1132}
1133
1134inline unsigned SDValue::getOpcode() const {
1135 return Node->getOpcode();
1136}
1137
1138inline EVT SDValue::getValueType() const {
1139 return Node->getValueType(ResNo);
1140}
1141
1142inline unsigned SDValue::getNumOperands() const {
1143 return Node->getNumOperands();
1144}
1145
1146inline const SDValue &SDValue::getOperand(unsigned i) const {
1147 return Node->getOperand(i);
22
Called C++ object pointer is null
1148}
1149
1150inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
1151 return Node->getConstantOperandVal(i);
1152}
1153
1154inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
1155 return Node->getConstantOperandAPInt(i);
1156}
1157
1158inline bool SDValue::isTargetOpcode() const {
1159 return Node->isTargetOpcode();
1160}
1161
1162inline bool SDValue::isTargetMemoryOpcode() const {
1163 return Node->isTargetMemoryOpcode();
1164}
1165
1166inline bool SDValue::isMachineOpcode() const {
1167 return Node->isMachineOpcode();
1168}
1169
1170inline unsigned SDValue::getMachineOpcode() const {
1171 return Node->getMachineOpcode();
1172}
1173
1174inline bool SDValue::isUndef() const {
1175 return Node->isUndef();
1176}
1177
1178inline bool SDValue::use_empty() const {
1179 return !Node->hasAnyUseOfValue(ResNo);
1180}
1181
1182inline bool SDValue::hasOneUse() const {
1183 return Node->hasNUsesOfValue(1, ResNo);
1184}
1185
1186inline const DebugLoc &SDValue::getDebugLoc() const {
1187 return Node->getDebugLoc();
1188}
1189
1190inline void SDValue::dump() const {
1191 return Node->dump();
1192}
1193
1194inline void SDValue::dump(const SelectionDAG *G) const {
1195 return Node->dump(G);
1196}
1197
1198inline void SDValue::dumpr() const {
1199 return Node->dumpr();
1200}
1201
1202inline void SDValue::dumpr(const SelectionDAG *G) const {
1203 return Node->dumpr(G);
1204}
1205
1206// Define inline functions from the SDUse class.
1207
1208inline void SDUse::set(const SDValue &V) {
1209 if (Val.getNode()) removeFromList();
1210 Val = V;
1211 if (V.getNode())
1212 V->addUse(*this);
1213}
1214
1215inline void SDUse::setInitial(const SDValue &V) {
1216 Val = V;
1217 V->addUse(*this);
1218}
1219
1220inline void SDUse::setNode(SDNode *N) {
1221 if (Val.getNode()) removeFromList();
1222 Val.setNode(N);
1223 if (N) N->addUse(*this);
1224}
1225
1226/// This class is used to form a handle around another node that
1227/// is persistent and is updated across invocations of replaceAllUsesWith on its
1228/// operand. This node should be directly created by end-users and not added to
1229/// the AllNodes list.
1230class HandleSDNode : public SDNode {
1231 SDUse Op;
1232
1233public:
1234 explicit HandleSDNode(SDValue X)
1235 : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
1236 // HandleSDNodes are never inserted into the DAG, so they won't be
1237 // auto-numbered. Use ID 65535 as a sentinel.
1238 PersistentId = 0xffff;
1239
1240 // Manually set up the operand list. This node type is special in that it's
1241 // always stack allocated and SelectionDAG does not manage its operands.
1242 // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not
1243 // be so special.
1244 Op.setUser(this);
1245 Op.setInitial(X);
1246 NumOperands = 1;
1247 OperandList = &Op;
1248 }
1249 ~HandleSDNode();
1250
1251 const SDValue &getValue() const { return Op; }
1252};
1253
1254class AddrSpaceCastSDNode : public SDNode {
1255private:
1256 unsigned SrcAddrSpace;
1257 unsigned DestAddrSpace;
1258
1259public:
1260 AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT,
1261 unsigned SrcAS, unsigned DestAS);
1262
1263 unsigned getSrcAddressSpace() const { return SrcAddrSpace; }
1264 unsigned getDestAddressSpace() const { return DestAddrSpace; }
1265
1266 static bool classof(const SDNode *N) {
1267 return N->getOpcode() == ISD::ADDRSPACECAST;
1268 }
1269};
1270
1271/// This is an abstract virtual class for memory operations.
1272class MemSDNode : public SDNode {
1273private:
1274 // VT of in-memory value.
1275 EVT MemoryVT;
1276
1277protected:
1278 /// Memory reference information.
1279 MachineMemOperand *MMO;
1280
1281public:
1282 MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs,
1283 EVT memvt, MachineMemOperand *MMO);
1284
1285 bool readMem() const { return MMO->isLoad(); }
1286 bool writeMem() const { return MMO->isStore(); }
1287
1288 /// Returns alignment and volatility of the memory access
1289 Align getOriginalAlign() const { return MMO->getBaseAlign(); }
1290 Align getAlign() const { return MMO->getAlign(); }
1291 // FIXME: Remove once transition to getAlign is over.
1292 unsigned getAlignment() const { return MMO->getAlign().value(); }
1293
1294 /// Return the SubclassData value, without HasDebugValue. This contains an
1295 /// encoding of the volatile flag, as well as bits used by subclasses. This
1296 /// function should only be used to compute a FoldingSetNodeID value.
1297 /// The HasDebugValue bit is masked out because CSE map needs to match
1298 /// nodes with debug info with nodes without debug info. Same is about
1299 /// isDivergent bit.
1300 unsigned getRawSubclassData() const {
1301 uint16_t Data;
1302 union {
1303 char RawSDNodeBits[sizeof(uint16_t)];
1304 SDNodeBitfields SDNodeBits;
1305 };
1306 memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
1307 SDNodeBits.HasDebugValue = 0;
1308 SDNodeBits.IsDivergent = false;
1309 memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
1310 return Data;
1311 }
1312
1313 bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
1314 bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
1315 bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
1316 bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
1317
1318 // Returns the offset from the location of the access.
1319 int64_t getSrcValueOffset() const { return MMO->getOffset(); }
1320
1321 /// Returns the AA info that describes the dereference.
1322 AAMDNodes getAAInfo() const { return MMO->getAAInfo(); }
1323
1324 /// Returns the Ranges that describes the dereference.
1325 const MDNode *getRanges() const { return MMO->getRanges(); }
1326
1327 /// Returns the synchronization scope ID for this memory operation.
1328 SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); }
1329
1330 /// Return the atomic ordering requirements for this memory operation. For
1331 /// cmpxchg atomic operations, return the atomic ordering requirements when
1332 /// store occurs.
1333 AtomicOrdering getSuccessOrdering() const {
1334 return MMO->getSuccessOrdering();
1335 }
1336
1337 /// Return a single atomic ordering that is at least as strong as both the
1338 /// success and failure orderings for an atomic operation. (For operations
1339 /// other than cmpxchg, this is equivalent to getSuccessOrdering().)
1340 AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); }
1341
1342 /// Return true if the memory operation ordering is Unordered or higher.
1343 bool isAtomic() const { return MMO->isAtomic(); }
1344
1345 /// Returns true if the memory operation doesn't imply any ordering
1346 /// constraints on surrounding memory operations beyond the normal memory
1347 /// aliasing rules.
1348 bool isUnordered() const { return MMO->isUnordered(); }
1349
1350 /// Returns true if the memory operation is neither atomic or volatile.
1351 bool isSimple() const { return !isAtomic() && !isVolatile(); }
1352
1353 /// Return the type of the in-memory value.
1354 EVT getMemoryVT() const { return MemoryVT; }
1355
1356 /// Return a MachineMemOperand object describing the memory
1357 /// reference performed by operation.
1358 MachineMemOperand *getMemOperand() const { return MMO; }
1359
1360 const MachinePointerInfo &getPointerInfo() const {
1361 return MMO->getPointerInfo();
1362 }
1363
1364 /// Return the address space for the associated pointer
1365 unsigned getAddressSpace() const {
1366 return getPointerInfo().getAddrSpace();
1367 }
1368
1369 /// Update this MemSDNode's MachineMemOperand information
1370 /// to reflect the alignment of NewMMO, if it has a greater alignment.
1371 /// This must only be used when the new alignment applies to all users of
1372 /// this MachineMemOperand.
1373 void refineAlignment(const MachineMemOperand *NewMMO) {
1374 MMO->refineAlignment(NewMMO);
1375 }
1376
1377 const SDValue &getChain() const { return getOperand(0); }
1378
1379 const SDValue &getBasePtr() const {
1380 switch (getOpcode()) {
1381 case ISD::STORE:
1382 case ISD::VP_STORE:
1383 case ISD::MSTORE:
1384 case ISD::VP_SCATTER:
1385 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1386 return getOperand(2);
1387 case ISD::MGATHER:
1388 case ISD::MSCATTER:
1389 return getOperand(3);
1390 default:
1391 return getOperand(1);
1392 }
1393 }
1394
1395 // Methods to support isa and dyn_cast
1396 static bool classof(const SDNode *N) {
1397 // For some targets, we lower some target intrinsics to a MemIntrinsicNode
1398 // with either an intrinsic or a target opcode.
1399 switch (N->getOpcode()) {
1400 case ISD::LOAD:
1401 case ISD::STORE:
1402 case ISD::PREFETCH:
1403 case ISD::ATOMIC_CMP_SWAP:
1404 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
1405 case ISD::ATOMIC_SWAP:
1406 case ISD::ATOMIC_LOAD_ADD:
1407 case ISD::ATOMIC_LOAD_SUB:
1408 case ISD::ATOMIC_LOAD_AND:
1409 case ISD::ATOMIC_LOAD_CLR:
1410 case ISD::ATOMIC_LOAD_OR:
1411 case ISD::ATOMIC_LOAD_XOR:
1412 case ISD::ATOMIC_LOAD_NAND:
1413 case ISD::ATOMIC_LOAD_MIN:
1414 case ISD::ATOMIC_LOAD_MAX:
1415 case ISD::ATOMIC_LOAD_UMIN:
1416 case ISD::ATOMIC_LOAD_UMAX:
1417 case ISD::ATOMIC_LOAD_FADD:
1418 case ISD::ATOMIC_LOAD_FSUB:
1419 case ISD::ATOMIC_LOAD_FMAX:
1420 case ISD::ATOMIC_LOAD_FMIN:
1421 case ISD::ATOMIC_LOAD:
1422 case ISD::ATOMIC_STORE:
1423 case ISD::MLOAD:
1424 case ISD::MSTORE:
1425 case ISD::MGATHER:
1426 case ISD::MSCATTER:
1427 case ISD::VP_LOAD:
1428 case ISD::VP_STORE:
1429 case ISD::VP_GATHER:
1430 case ISD::VP_SCATTER:
1431 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
1432 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
1433 return true;
1434 default:
1435 return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
1436 }
1437 }
1438};
1439
1440/// This is an SDNode representing atomic operations.
1441class AtomicSDNode : public MemSDNode {
1442public:
1443 AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
1444 EVT MemVT, MachineMemOperand *MMO)
1445 : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
1446 assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1447, __extension__
__PRETTY_FUNCTION__))
1447 MMO->isAtomic()) && "then why are we using an AtomicSDNode?")(static_cast <bool> (((Opc != ISD::ATOMIC_LOAD &&
Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && "then why are we using an AtomicSDNode?"
) ? void (0) : __assert_fail ("((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) || MMO->isAtomic()) && \"then why are we using an AtomicSDNode?\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1447, __extension__
__PRETTY_FUNCTION__))
;
1448 }
1449
1450 const SDValue &getBasePtr() const { return getOperand(1); }
1451 const SDValue &getVal() const { return getOperand(2); }
1452
1453 /// Returns true if this SDNode represents cmpxchg atomic operation, false
1454 /// otherwise.
1455 bool isCompareAndSwap() const {
1456 unsigned Op = getOpcode();
1457 return Op == ISD::ATOMIC_CMP_SWAP ||
1458 Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
1459 }
1460
1461 /// For cmpxchg atomic operations, return the atomic ordering requirements
1462 /// when store does not occur.
1463 AtomicOrdering getFailureOrdering() const {
1464 assert(isCompareAndSwap() && "Must be cmpxchg operation")(static_cast <bool> (isCompareAndSwap() && "Must be cmpxchg operation"
) ? void (0) : __assert_fail ("isCompareAndSwap() && \"Must be cmpxchg operation\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1464, __extension__
__PRETTY_FUNCTION__))
;
1465 return MMO->getFailureOrdering();
1466 }
1467
1468 // Methods to support isa and dyn_cast
1469 static bool classof(const SDNode *N) {
1470 return N->getOpcode() == ISD::ATOMIC_CMP_SWAP ||
1471 N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS ||
1472 N->getOpcode() == ISD::ATOMIC_SWAP ||
1473 N->getOpcode() == ISD::ATOMIC_LOAD_ADD ||
1474 N->getOpcode() == ISD::ATOMIC_LOAD_SUB ||
1475 N->getOpcode() == ISD::ATOMIC_LOAD_AND ||
1476 N->getOpcode() == ISD::ATOMIC_LOAD_CLR ||
1477 N->getOpcode() == ISD::ATOMIC_LOAD_OR ||
1478 N->getOpcode() == ISD::ATOMIC_LOAD_XOR ||
1479 N->getOpcode() == ISD::ATOMIC_LOAD_NAND ||
1480 N->getOpcode() == ISD::ATOMIC_LOAD_MIN ||
1481 N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
1482 N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
1483 N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
1484 N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
1485 N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
1486 N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
1487 N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
1488 N->getOpcode() == ISD::ATOMIC_LOAD ||
1489 N->getOpcode() == ISD::ATOMIC_STORE;
1490 }
1491};
1492
1493/// This SDNode is used for target intrinsics that touch
1494/// memory and need an associated MachineMemOperand. Its opcode may be
1495/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
1496/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
1497class MemIntrinsicSDNode : public MemSDNode {
1498public:
1499 MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
1500 SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
1501 : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
1502 SDNodeBits.IsMemIntrinsic = true;
1503 }
1504
1505 // Methods to support isa and dyn_cast
1506 static bool classof(const SDNode *N) {
1507 // We lower some target intrinsics to their target opcode
1508 // early a node with a target opcode can be of this class
1509 return N->isMemIntrinsic() ||
1510 N->getOpcode() == ISD::PREFETCH ||
1511 N->isTargetMemoryOpcode();
1512 }
1513};
1514
1515/// This SDNode is used to implement the code generator
1516/// support for the llvm IR shufflevector instruction. It combines elements
1517/// from two input vectors into a new input vector, with the selection and
1518/// ordering of elements determined by an array of integers, referred to as
1519/// the shuffle mask. For input vectors of width N, mask indices of 0..N-1
1520/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
1521/// An index of -1 is treated as undef, such that the code generator may put
1522/// any value in the corresponding element of the result.
1523class ShuffleVectorSDNode : public SDNode {
1524 // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
1525 // is freed when the SelectionDAG object is destroyed.
1526 const int *Mask;
1527
1528protected:
1529 friend class SelectionDAG;
1530
1531 ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
1532 : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
1533
1534public:
1535 ArrayRef<int> getMask() const {
1536 EVT VT = getValueType(0);
1537 return makeArrayRef(Mask, VT.getVectorNumElements());
1538 }
1539
1540 int getMaskElt(unsigned Idx) const {
1541 assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")(static_cast <bool> (Idx < getValueType(0).getVectorNumElements
() && "Idx out of range!") ? void (0) : __assert_fail
("Idx < getValueType(0).getVectorNumElements() && \"Idx out of range!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1541, __extension__
__PRETTY_FUNCTION__))
;
1542 return Mask[Idx];
1543 }
1544
1545 bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
1546
1547 int getSplatIndex() const {
1548 assert(isSplat() && "Cannot get splat index for non-splat!")(static_cast <bool> (isSplat() && "Cannot get splat index for non-splat!"
) ? void (0) : __assert_fail ("isSplat() && \"Cannot get splat index for non-splat!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1548, __extension__
__PRETTY_FUNCTION__))
;
1549 EVT VT = getValueType(0);
1550 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
1551 if (Mask[i] >= 0)
1552 return Mask[i];
1553
1554 // We can choose any index value here and be correct because all elements
1555 // are undefined. Return 0 for better potential for callers to simplify.
1556 return 0;
1557 }
1558
1559 static bool isSplatMask(const int *Mask, EVT VT);
1560
1561 /// Change values in a shuffle permute mask assuming
1562 /// the two vector operands have swapped position.
1563 static void commuteMask(MutableArrayRef<int> Mask) {
1564 unsigned NumElems = Mask.size();
1565 for (unsigned i = 0; i != NumElems; ++i) {
1566 int idx = Mask[i];
1567 if (idx < 0)
1568 continue;
1569 else if (idx < (int)NumElems)
1570 Mask[i] = idx + NumElems;
1571 else
1572 Mask[i] = idx - NumElems;
1573 }
1574 }
1575
1576 static bool classof(const SDNode *N) {
1577 return N->getOpcode() == ISD::VECTOR_SHUFFLE;
1578 }
1579};
1580
1581class ConstantSDNode : public SDNode {
1582 friend class SelectionDAG;
1583
1584 const ConstantInt *Value;
1585
1586 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT)
1587 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(),
1588 getSDVTList(VT)),
1589 Value(val) {
1590 ConstantSDNodeBits.IsOpaque = isOpaque;
1591 }
1592
1593public:
1594 const ConstantInt *getConstantIntValue() const { return Value; }
1595 const APInt &getAPIntValue() const { return Value->getValue(); }
1596 uint64_t getZExtValue() const { return Value->getZExtValue(); }
1597 int64_t getSExtValue() const { return Value->getSExtValue(); }
1598 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) {
1599 return Value->getLimitedValue(Limit);
1600 }
1601 MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); }
1602 Align getAlignValue() const { return Value->getAlignValue(); }
1603
1604 bool isOne() const { return Value->isOne(); }
1605 bool isZero() const { return Value->isZero(); }
1606 // NOTE: This is soft-deprecated. Please use `isZero()` instead.
1607 bool isNullValue() const { return isZero(); }
1608 bool isAllOnes() const { return Value->isMinusOne(); }
1609 // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
1610 bool isAllOnesValue() const { return isAllOnes(); }
1611 bool isMaxSignedValue() const { return Value->isMaxValue(true); }
1612 bool isMinSignedValue() const { return Value->isMinValue(true); }
1613
1614 bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
1615
1616 static bool classof(const SDNode *N) {
1617 return N->getOpcode() == ISD::Constant ||
1618 N->getOpcode() == ISD::TargetConstant;
1619 }
1620};
1621
1622uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
1623 return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
1624}
1625
1626const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
1627 return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
1628}
1629
1630class ConstantFPSDNode : public SDNode {
1631 friend class SelectionDAG;
1632
1633 const ConstantFP *Value;
1634
1635 ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
1636 : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0,
1637 DebugLoc(), getSDVTList(VT)),
1638 Value(val) {}
1639
1640public:
1641 const APFloat& getValueAPF() const { return Value->getValueAPF(); }
1642 const ConstantFP *getConstantFPValue() const { return Value; }
1643
1644 /// Return true if the value is positive or negative zero.
1645 bool isZero() const { return Value->isZero(); }
1646
1647 /// Return true if the value is a NaN.
1648 bool isNaN() const { return Value->isNaN(); }
1649
1650 /// Return true if the value is an infinity
1651 bool isInfinity() const { return Value->isInfinity(); }
1652
1653 /// Return true if the value is negative.
1654 bool isNegative() const { return Value->isNegative(); }
1655
1656 /// We don't rely on operator== working on double values, as
1657 /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
1658 /// As such, this method can be used to do an exact bit-for-bit comparison of
1659 /// two floating point values.
1660
1661 /// We leave the version with the double argument here because it's just so
1662 /// convenient to write "2.0" and the like. Without this function we'd
1663 /// have to duplicate its logic everywhere it's called.
1664 bool isExactlyValue(double V) const {
1665 return Value->getValueAPF().isExactlyValue(V);
1666 }
1667 bool isExactlyValue(const APFloat& V) const;
1668
1669 static bool isValueValidForType(EVT VT, const APFloat& Val);
1670
1671 static bool classof(const SDNode *N) {
1672 return N->getOpcode() == ISD::ConstantFP ||
1673 N->getOpcode() == ISD::TargetConstantFP;
1674 }
1675};
1676
1677/// Returns true if \p V is a constant integer zero.
1678bool isNullConstant(SDValue V);
1679
1680/// Returns true if \p V is an FP constant with a value of positive zero.
1681bool isNullFPConstant(SDValue V);
1682
1683/// Returns true if \p V is an integer constant with all bits set.
1684bool isAllOnesConstant(SDValue V);
1685
1686/// Returns true if \p V is a constant integer one.
1687bool isOneConstant(SDValue V);
1688
1689/// Returns true if \p V is a constant min signed integer value.
1690bool isMinSignedConstant(SDValue V);
1691
1692/// Return the non-bitcasted source operand of \p V if it exists.
1693/// If \p V is not a bitcasted value, it is returned as-is.
1694SDValue peekThroughBitcasts(SDValue V);
1695
1696/// Return the non-bitcasted and one-use source operand of \p V if it exists.
1697/// If \p V is not a bitcasted one-use value, it is returned as-is.
1698SDValue peekThroughOneUseBitcasts(SDValue V);
1699
1700/// Return the non-extracted vector source operand of \p V if it exists.
1701/// If \p V is not an extracted subvector, it is returned as-is.
1702SDValue peekThroughExtractSubvectors(SDValue V);
1703
1704/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
1705/// constant is canonicalized to be operand 1.
1706bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
1707
1708/// If \p V is a bitwise not, returns the inverted operand. Otherwise returns
1709/// an empty SDValue. Only bits set in \p Mask are required to be inverted,
1710/// other bits may be arbitrary.
1711SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs);
1712
1713/// Returns the SDNode if it is a constant splat BuildVector or constant int.
1714ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
1715 bool AllowTruncation = false);
1716
1717/// Returns the SDNode if it is a demanded constant splat BuildVector or
1718/// constant int.
1719ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
1720 bool AllowUndefs = false,
1721 bool AllowTruncation = false);
1722
1723/// Returns the SDNode if it is a constant splat BuildVector or constant float.
1724ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
1725
1726/// Returns the SDNode if it is a demanded constant splat BuildVector or
1727/// constant float.
1728ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
1729 bool AllowUndefs = false);
1730
1731/// Return true if the value is a constant 0 integer or a splatted vector of
1732/// a constant 0 integer (with no undefs by default).
1733/// Build vector implicit truncation is not an issue for null values.
1734bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
1735
1736/// Return true if the value is a constant 1 integer or a splatted vector of a
1737/// constant 1 integer (with no undefs).
1738/// Does not permit build vector implicit truncation.
1739bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
1740
1741/// Return true if the value is a constant -1 integer or a splatted vector of a
1742/// constant -1 integer (with no undefs).
1743/// Does not permit build vector implicit truncation.
1744bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
1745
1746/// Return true if \p V is either a integer or FP constant.
1747inline bool isIntOrFPConstant(SDValue V) {
1748 return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V);
1749}
1750
1751class GlobalAddressSDNode : public SDNode {
1752 friend class SelectionDAG;
1753
1754 const GlobalValue *TheGlobal;
1755 int64_t Offset;
1756 unsigned TargetFlags;
1757
1758 GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL,
1759 const GlobalValue *GA, EVT VT, int64_t o,
1760 unsigned TF);
1761
1762public:
1763 const GlobalValue *getGlobal() const { return TheGlobal; }
1764 int64_t getOffset() const { return Offset; }
1765 unsigned getTargetFlags() const { return TargetFlags; }
1766 // Return the address space this GlobalAddress belongs to.
1767 unsigned getAddressSpace() const;
1768
1769 static bool classof(const SDNode *N) {
1770 return N->getOpcode() == ISD::GlobalAddress ||
1771 N->getOpcode() == ISD::TargetGlobalAddress ||
1772 N->getOpcode() == ISD::GlobalTLSAddress ||
1773 N->getOpcode() == ISD::TargetGlobalTLSAddress;
1774 }
1775};
1776
1777class FrameIndexSDNode : public SDNode {
1778 friend class SelectionDAG;
1779
1780 int FI;
1781
1782 FrameIndexSDNode(int fi, EVT VT, bool isTarg)
1783 : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
1784 0, DebugLoc(), getSDVTList(VT)), FI(fi) {
1785 }
1786
1787public:
1788 int getIndex() const { return FI; }
1789
1790 static bool classof(const SDNode *N) {
1791 return N->getOpcode() == ISD::FrameIndex ||
1792 N->getOpcode() == ISD::TargetFrameIndex;
1793 }
1794};
1795
1796/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
1797/// the offet and size that are started/ended in the underlying FrameIndex.
1798class LifetimeSDNode : public SDNode {
1799 friend class SelectionDAG;
1800 int64_t Size;
1801 int64_t Offset; // -1 if offset is unknown.
1802
1803 LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1804 SDVTList VTs, int64_t Size, int64_t Offset)
1805 : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
1806public:
1807 int64_t getFrameIndex() const {
1808 return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
1809 }
1810
1811 bool hasOffset() const { return Offset >= 0; }
1812 int64_t getOffset() const {
1813 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1813, __extension__
__PRETTY_FUNCTION__))
;
1814 return Offset;
1815 }
1816 int64_t getSize() const {
1817 assert(hasOffset() && "offset is unknown")(static_cast <bool> (hasOffset() && "offset is unknown"
) ? void (0) : __assert_fail ("hasOffset() && \"offset is unknown\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1817, __extension__
__PRETTY_FUNCTION__))
;
1818 return Size;
1819 }
1820
1821 // Methods to support isa and dyn_cast
1822 static bool classof(const SDNode *N) {
1823 return N->getOpcode() == ISD::LIFETIME_START ||
1824 N->getOpcode() == ISD::LIFETIME_END;
1825 }
1826};
1827
1828/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
1829/// the index of the basic block being probed. A pseudo probe serves as a place
1830/// holder and will be removed at the end of compilation. It does not have any
1831/// operand because we do not want the instruction selection to deal with any.
1832class PseudoProbeSDNode : public SDNode {
1833 friend class SelectionDAG;
1834 uint64_t Guid;
1835 uint64_t Index;
1836 uint32_t Attributes;
1837
1838 PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
1839 SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
1840 : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
1841 Attributes(Attr) {}
1842
1843public:
1844 uint64_t getGuid() const { return Guid; }
1845 uint64_t getIndex() const { return Index; }
1846 uint32_t getAttributes() const { return Attributes; }
1847
1848 // Methods to support isa and dyn_cast
1849 static bool classof(const SDNode *N) {
1850 return N->getOpcode() == ISD::PSEUDO_PROBE;
1851 }
1852};
1853
1854class JumpTableSDNode : public SDNode {
1855 friend class SelectionDAG;
1856
1857 int JTI;
1858 unsigned TargetFlags;
1859
1860 JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF)
1861 : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
1862 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
1863 }
1864
1865public:
1866 int getIndex() const { return JTI; }
1867 unsigned getTargetFlags() const { return TargetFlags; }
1868
1869 static bool classof(const SDNode *N) {
1870 return N->getOpcode() == ISD::JumpTable ||
1871 N->getOpcode() == ISD::TargetJumpTable;
1872 }
1873};
1874
1875class ConstantPoolSDNode : public SDNode {
1876 friend class SelectionDAG;
1877
1878 union {
1879 const Constant *ConstVal;
1880 MachineConstantPoolValue *MachineCPVal;
1881 } Val;
1882 int Offset; // It's a MachineConstantPoolValue if top bit is set.
1883 Align Alignment; // Minimum alignment requirement of CP.
1884 unsigned TargetFlags;
1885
1886 ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
1887 Align Alignment, unsigned TF)
1888 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1889 DebugLoc(), getSDVTList(VT)),
1890 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1891 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1891, __extension__
__PRETTY_FUNCTION__))
;
1892 Val.ConstVal = c;
1893 }
1894
1895 ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o,
1896 Align Alignment, unsigned TF)
1897 : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
1898 DebugLoc(), getSDVTList(VT)),
1899 Offset(o), Alignment(Alignment), TargetFlags(TF) {
1900 assert(Offset >= 0 && "Offset is too large")(static_cast <bool> (Offset >= 0 && "Offset is too large"
) ? void (0) : __assert_fail ("Offset >= 0 && \"Offset is too large\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1900, __extension__
__PRETTY_FUNCTION__))
;
1901 Val.MachineCPVal = v;
1902 Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1);
1903 }
1904
1905public:
1906 bool isMachineConstantPoolEntry() const {
1907 return Offset < 0;
1908 }
1909
1910 const Constant *getConstVal() const {
1911 assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (!isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("!isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1911, __extension__
__PRETTY_FUNCTION__))
;
1912 return Val.ConstVal;
1913 }
1914
1915 MachineConstantPoolValue *getMachineCPVal() const {
1916 assert(isMachineConstantPoolEntry() && "Wrong constantpool type")(static_cast <bool> (isMachineConstantPoolEntry() &&
"Wrong constantpool type") ? void (0) : __assert_fail ("isMachineConstantPoolEntry() && \"Wrong constantpool type\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 1916, __extension__
__PRETTY_FUNCTION__))
;
1917 return Val.MachineCPVal;
1918 }
1919
1920 int getOffset() const {
1921 return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1));
1922 }
1923
1924 // Return the alignment of this constant pool object, which is either 0 (for
1925 // default alignment) or the desired value.
1926 Align getAlign() const { return Alignment; }
1927 unsigned getTargetFlags() const { return TargetFlags; }
1928
1929 Type *getType() const;
1930
1931 static bool classof(const SDNode *N) {
1932 return N->getOpcode() == ISD::ConstantPool ||
1933 N->getOpcode() == ISD::TargetConstantPool;
1934 }
1935};
1936
1937/// Completely target-dependent object reference.
1938class TargetIndexSDNode : public SDNode {
1939 friend class SelectionDAG;
1940
1941 unsigned TargetFlags;
1942 int Index;
1943 int64_t Offset;
1944
1945public:
1946 TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF)
1947 : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
1948 TargetFlags(TF), Index(Idx), Offset(Ofs) {}
1949
1950 unsigned getTargetFlags() const { return TargetFlags; }
1951 int getIndex() const { return Index; }
1952 int64_t getOffset() const { return Offset; }
1953
1954 static bool classof(const SDNode *N) {
1955 return N->getOpcode() == ISD::TargetIndex;
1956 }
1957};
1958
1959class BasicBlockSDNode : public SDNode {
1960 friend class SelectionDAG;
1961
1962 MachineBasicBlock *MBB;
1963
1964 /// Debug info is meaningful and potentially useful here, but we create
1965 /// blocks out of order when they're jumped to, which makes it a bit
1966 /// harder. Let's see if we need it first.
1967 explicit BasicBlockSDNode(MachineBasicBlock *mbb)
1968 : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
1969 {}
1970
1971public:
1972 MachineBasicBlock *getBasicBlock() const { return MBB; }
1973
1974 static bool classof(const SDNode *N) {
1975 return N->getOpcode() == ISD::BasicBlock;
1976 }
1977};
1978
1979/// A "pseudo-class" with methods for operating on BUILD_VECTORs.
1980class BuildVectorSDNode : public SDNode {
1981public:
1982 // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
1983 explicit BuildVectorSDNode() = delete;
1984
1985 /// Check if this is a constant splat, and if so, find the
1986 /// smallest element size that splats the vector. If MinSplatBits is
1987 /// nonzero, the element size must be at least that large. Note that the
1988 /// splat element may be the entire vector (i.e., a one element vector).
1989 /// Returns the splat element value in SplatValue. Any undefined bits in
1990 /// that value are zero, and the corresponding bits in the SplatUndef mask
1991 /// are set. The SplatBitSize value is set to the splat element size in
1992 /// bits. HasAnyUndefs is set to true if any bits in the vector are
1993 /// undefined. isBigEndian describes the endianness of the target.
1994 bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
1995 unsigned &SplatBitSize, bool &HasAnyUndefs,
1996 unsigned MinSplatBits = 0,
1997 bool isBigEndian = false) const;
1998
1999 /// Returns the demanded splatted value or a null value if this is not a
2000 /// splat.
2001 ///
2002 /// The DemandedElts mask indicates the elements that must be in the splat.
2003 /// If passed a non-null UndefElements bitvector, it will resize it to match
2004 /// the vector width and set the bits where elements are undef.
2005 SDValue getSplatValue(const APInt &DemandedElts,
2006 BitVector *UndefElements = nullptr) const;
2007
2008 /// Returns the splatted value or a null value if this is not a splat.
2009 ///
2010 /// If passed a non-null UndefElements bitvector, it will resize it to match
2011 /// the vector width and set the bits where elements are undef.
2012 SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
2013
2014 /// Find the shortest repeating sequence of values in the build vector.
2015 ///
2016 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
2017 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
2018 ///
2019 /// Currently this must be a power-of-2 build vector.
2020 /// The DemandedElts mask indicates the elements that must be present,
2021 /// undemanded elements in Sequence may be null (SDValue()). If passed a
2022 /// non-null UndefElements bitvector, it will resize it to match the original
2023 /// vector width and set the bits where elements are undef. If result is
2024 /// false, Sequence will be empty.
2025 bool getRepeatedSequence(const APInt &DemandedElts,
2026 SmallVectorImpl<SDValue> &Sequence,
2027 BitVector *UndefElements = nullptr) const;
2028
2029 /// Find the shortest repeating sequence of values in the build vector.
2030 ///
2031 /// e.g. { u, X, u, X, u, u, X, u } -> { X }
2032 /// { X, Y, u, Y, u, u, X, u } -> { X, Y }
2033 ///
2034 /// Currently this must be a power-of-2 build vector.
2035 /// If passed a non-null UndefElements bitvector, it will resize it to match
2036 /// the original vector width and set the bits where elements are undef.
2037 /// If result is false, Sequence will be empty.
2038 bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
2039 BitVector *UndefElements = nullptr) const;
2040
2041 /// Returns the demanded splatted constant or null if this is not a constant
2042 /// splat.
2043 ///
2044 /// The DemandedElts mask indicates the elements that must be in the splat.
2045 /// If passed a non-null UndefElements bitvector, it will resize it to match
2046 /// the vector width and set the bits where elements are undef.
2047 ConstantSDNode *
2048 getConstantSplatNode(const APInt &DemandedElts,
2049 BitVector *UndefElements = nullptr) const;
2050
2051 /// Returns the splatted constant or null if this is not a constant
2052 /// splat.
2053 ///
2054 /// If passed a non-null UndefElements bitvector, it will resize it to match
2055 /// the vector width and set the bits where elements are undef.
2056 ConstantSDNode *
2057 getConstantSplatNode(BitVector *UndefElements = nullptr) const;
2058
2059 /// Returns the demanded splatted constant FP or null if this is not a
2060 /// constant FP splat.
2061 ///
2062 /// The DemandedElts mask indicates the elements that must be in the splat.
2063 /// If passed a non-null UndefElements bitvector, it will resize it to match
2064 /// the vector width and set the bits where elements are undef.
2065 ConstantFPSDNode *
2066 getConstantFPSplatNode(const APInt &DemandedElts,
2067 BitVector *UndefElements = nullptr) const;
2068
2069 /// Returns the splatted constant FP or null if this is not a constant
2070 /// FP splat.
2071 ///
2072 /// If passed a non-null UndefElements bitvector, it will resize it to match
2073 /// the vector width and set the bits where elements are undef.
2074 ConstantFPSDNode *
2075 getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
2076
2077 /// If this is a constant FP splat and the splatted constant FP is an
2078 /// exact power or 2, return the log base 2 integer value. Otherwise,
2079 /// return -1.
2080 ///
2081 /// The BitWidth specifies the necessary bit precision.
2082 int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
2083 uint32_t BitWidth) const;
2084
2085 /// Extract the raw bit data from a build vector of Undef, Constant or
2086 /// ConstantFP node elements. Each raw bit element will be \p
2087 /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
2088 /// undefined elements are flagged in \p UndefElements.
2089 bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2090 SmallVectorImpl<APInt> &RawBitElements,
2091 BitVector &UndefElements) const;
2092
2093 bool isConstant() const;
2094
2095 /// If this BuildVector is constant and represents the numerical series
2096 /// "<a, a+n, a+2n, a+3n, ...>" where a is integer and n is a non-zero integer,
2097 /// the value "<a,n>" is returned.
2098 Optional<std::pair<APInt, APInt>> isConstantSequence() const;
2099
2100 /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
2101 /// Undef elements are treated as zero, and entirely undefined elements are
2102 /// flagged in \p DstUndefElements.
2103 static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
2104 SmallVectorImpl<APInt> &DstBitElements,
2105 ArrayRef<APInt> SrcBitElements,
2106 BitVector &DstUndefElements,
2107 const BitVector &SrcUndefElements);
2108
2109 static bool classof(const SDNode *N) {
2110 return N->getOpcode() == ISD::BUILD_VECTOR;
2111 }
2112};
2113
2114/// An SDNode that holds an arbitrary LLVM IR Value. This is
2115/// used when the SelectionDAG needs to make a simple reference to something
2116/// in the LLVM IR representation.
2117///
2118class SrcValueSDNode : public SDNode {
2119 friend class SelectionDAG;
2120
2121 const Value *V;
2122
2123 /// Create a SrcValue for a general value.
2124 explicit SrcValueSDNode(const Value *v)
2125 : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
2126
2127public:
2128 /// Return the contained Value.
2129 const Value *getValue() const { return V; }
2130
2131 static bool classof(const SDNode *N) {
2132 return N->getOpcode() == ISD::SRCVALUE;
2133 }
2134};
2135
2136class MDNodeSDNode : public SDNode {
2137 friend class SelectionDAG;
2138
2139 const MDNode *MD;
2140
2141 explicit MDNodeSDNode(const MDNode *md)
2142 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
2143 {}
2144
2145public:
2146 const MDNode *getMD() const { return MD; }
2147
2148 static bool classof(const SDNode *N) {
2149 return N->getOpcode() == ISD::MDNODE_SDNODE;
2150 }
2151};
2152
2153class RegisterSDNode : public SDNode {
2154 friend class SelectionDAG;
2155
2156 Register Reg;
2157
2158 RegisterSDNode(Register reg, EVT VT)
2159 : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
2160
2161public:
2162 Register getReg() const { return Reg; }
2163
2164 static bool classof(const SDNode *N) {
2165 return N->getOpcode() == ISD::Register;
2166 }
2167};
2168
2169class RegisterMaskSDNode : public SDNode {
2170 friend class SelectionDAG;
2171
2172 // The memory for RegMask is not owned by the node.
2173 const uint32_t *RegMask;
2174
2175 RegisterMaskSDNode(const uint32_t *mask)
2176 : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
2177 RegMask(mask) {}
2178
2179public:
2180 const uint32_t *getRegMask() const { return RegMask; }
2181
2182 static bool classof(const SDNode *N) {
2183 return N->getOpcode() == ISD::RegisterMask;
2184 }
2185};
2186
2187class BlockAddressSDNode : public SDNode {
2188 friend class SelectionDAG;
2189
2190 const BlockAddress *BA;
2191 int64_t Offset;
2192 unsigned TargetFlags;
2193
2194 BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
2195 int64_t o, unsigned Flags)
2196 : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
2197 BA(ba), Offset(o), TargetFlags(Flags) {}
2198
2199public:
2200 const BlockAddress *getBlockAddress() const { return BA; }
2201 int64_t getOffset() const { return Offset; }
2202 unsigned getTargetFlags() const { return TargetFlags; }
2203
2204 static bool classof(const SDNode *N) {
2205 return N->getOpcode() == ISD::BlockAddress ||
2206 N->getOpcode() == ISD::TargetBlockAddress;
2207 }
2208};
2209
2210class LabelSDNode : public SDNode {
2211 friend class SelectionDAG;
2212
2213 MCSymbol *Label;
2214
2215 LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
2216 : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
2217 assert(LabelSDNode::classof(this) && "not a label opcode")(static_cast <bool> (LabelSDNode::classof(this) &&
"not a label opcode") ? void (0) : __assert_fail ("LabelSDNode::classof(this) && \"not a label opcode\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2217, __extension__
__PRETTY_FUNCTION__))
;
2218 }
2219
2220public:
2221 MCSymbol *getLabel() const { return Label; }
2222
2223 static bool classof(const SDNode *N) {
2224 return N->getOpcode() == ISD::EH_LABEL ||
2225 N->getOpcode() == ISD::ANNOTATION_LABEL;
2226 }
2227};
2228
2229class ExternalSymbolSDNode : public SDNode {
2230 friend class SelectionDAG;
2231
2232 const char *Symbol;
2233 unsigned TargetFlags;
2234
2235 ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT)
2236 : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0,
2237 DebugLoc(), getSDVTList(VT)),
2238 Symbol(Sym), TargetFlags(TF) {}
2239
2240public:
2241 const char *getSymbol() const { return Symbol; }
2242 unsigned getTargetFlags() const { return TargetFlags; }
2243
2244 static bool classof(const SDNode *N) {
2245 return N->getOpcode() == ISD::ExternalSymbol ||
2246 N->getOpcode() == ISD::TargetExternalSymbol;
2247 }
2248};
2249
2250class MCSymbolSDNode : public SDNode {
2251 friend class SelectionDAG;
2252
2253 MCSymbol *Symbol;
2254
2255 MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
2256 : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
2257
2258public:
2259 MCSymbol *getMCSymbol() const { return Symbol; }
2260
2261 static bool classof(const SDNode *N) {
2262 return N->getOpcode() == ISD::MCSymbol;
2263 }
2264};
2265
2266class CondCodeSDNode : public SDNode {
2267 friend class SelectionDAG;
2268
2269 ISD::CondCode Condition;
2270
2271 explicit CondCodeSDNode(ISD::CondCode Cond)
2272 : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2273 Condition(Cond) {}
2274
2275public:
2276 ISD::CondCode get() const { return Condition; }
2277
2278 static bool classof(const SDNode *N) {
2279 return N->getOpcode() == ISD::CONDCODE;
2280 }
2281};
2282
2283/// This class is used to represent EVT's, which are used
2284/// to parameterize some operations.
2285class VTSDNode : public SDNode {
2286 friend class SelectionDAG;
2287
2288 EVT ValueType;
2289
2290 explicit VTSDNode(EVT VT)
2291 : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
2292 ValueType(VT) {}
2293
2294public:
2295 EVT getVT() const { return ValueType; }
2296
2297 static bool classof(const SDNode *N) {
2298 return N->getOpcode() == ISD::VALUETYPE;
2299 }
2300};
2301
2302/// Base class for LoadSDNode and StoreSDNode
2303class LSBaseSDNode : public MemSDNode {
2304public:
2305 LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
2306 SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
2307 MachineMemOperand *MMO)
2308 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2309 LSBaseSDNodeBits.AddressingMode = AM;
2310 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2310, __extension__
__PRETTY_FUNCTION__))
;
2311 }
2312
2313 const SDValue &getOffset() const {
2314 return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
2315 }
2316
2317 /// Return the addressing mode for this load or store:
2318 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2319 ISD::MemIndexedMode getAddressingMode() const {
2320 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2321 }
2322
2323 /// Return true if this is a pre/post inc/dec load/store.
2324 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2325
2326 /// Return true if this is NOT a pre/post inc/dec load/store.
2327 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2328
2329 static bool classof(const SDNode *N) {
2330 return N->getOpcode() == ISD::LOAD ||
2331 N->getOpcode() == ISD::STORE;
2332 }
2333};
2334
2335/// This class is used to represent ISD::LOAD nodes.
2336class LoadSDNode : public LSBaseSDNode {
2337 friend class SelectionDAG;
2338
2339 LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2340 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
2341 MachineMemOperand *MMO)
2342 : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2343 LoadSDNodeBits.ExtTy = ETy;
2344 assert(readMem() && "Load MachineMemOperand is not a load!")(static_cast <bool> (readMem() && "Load MachineMemOperand is not a load!"
) ? void (0) : __assert_fail ("readMem() && \"Load MachineMemOperand is not a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2344, __extension__
__PRETTY_FUNCTION__))
;
2345 assert(!writeMem() && "Load MachineMemOperand is a store!")(static_cast <bool> (!writeMem() && "Load MachineMemOperand is a store!"
) ? void (0) : __assert_fail ("!writeMem() && \"Load MachineMemOperand is a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2345, __extension__
__PRETTY_FUNCTION__))
;
2346 }
2347
2348public:
2349 /// Return whether this is a plain node,
2350 /// or one of the varieties of value-extending loads.
2351 ISD::LoadExtType getExtensionType() const {
2352 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2353 }
2354
2355 const SDValue &getBasePtr() const { return getOperand(1); }
2356 const SDValue &getOffset() const { return getOperand(2); }
2357
2358 static bool classof(const SDNode *N) {
2359 return N->getOpcode() == ISD::LOAD;
2360 }
2361};
2362
2363/// This class is used to represent ISD::STORE nodes.
2364class StoreSDNode : public LSBaseSDNode {
2365 friend class SelectionDAG;
2366
2367 StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2368 ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
2369 MachineMemOperand *MMO)
2370 : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
2371 StoreSDNodeBits.IsTruncating = isTrunc;
2372 assert(!readMem() && "Store MachineMemOperand is a load!")(static_cast <bool> (!readMem() && "Store MachineMemOperand is a load!"
) ? void (0) : __assert_fail ("!readMem() && \"Store MachineMemOperand is a load!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2372, __extension__
__PRETTY_FUNCTION__))
;
2373 assert(writeMem() && "Store MachineMemOperand is not a store!")(static_cast <bool> (writeMem() && "Store MachineMemOperand is not a store!"
) ? void (0) : __assert_fail ("writeMem() && \"Store MachineMemOperand is not a store!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2373, __extension__
__PRETTY_FUNCTION__))
;
2374 }
2375
2376public:
2377 /// Return true if the op does a truncation before store.
2378 /// For integers this is the same as doing a TRUNCATE and storing the result.
2379 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2380 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2381 void setTruncatingStore(bool Truncating) {
2382 StoreSDNodeBits.IsTruncating = Truncating;
2383 }
2384
2385 const SDValue &getValue() const { return getOperand(1); }
2386 const SDValue &getBasePtr() const { return getOperand(2); }
2387 const SDValue &getOffset() const { return getOperand(3); }
2388
2389 static bool classof(const SDNode *N) {
2390 return N->getOpcode() == ISD::STORE;
2391 }
2392};
2393
2394/// This base class is used to represent VP_LOAD, VP_STORE,
2395/// EXPERIMENTAL_VP_STRIDED_LOAD and EXPERIMENTAL_VP_STRIDED_STORE nodes
2396class VPBaseLoadStoreSDNode : public MemSDNode {
2397public:
2398 friend class SelectionDAG;
2399
2400 VPBaseLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2401 const DebugLoc &DL, SDVTList VTs,
2402 ISD::MemIndexedMode AM, EVT MemVT,
2403 MachineMemOperand *MMO)
2404 : MemSDNode(NodeTy, Order, DL, VTs, MemVT, MMO) {
2405 LSBaseSDNodeBits.AddressingMode = AM;
2406 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2406, __extension__
__PRETTY_FUNCTION__))
;
2407 }
2408
2409 // VPStridedStoreSDNode (Chain, Data, Ptr, Offset, Stride, Mask, EVL)
2410 // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
2411 // VPStridedLoadSDNode (Chain, Ptr, Offset, Stride, Mask, EVL)
2412 // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
2413 // Mask is a vector of i1 elements;
2414 // the type of EVL is TLI.getVPExplicitVectorLengthTy().
2415 const SDValue &getOffset() const {
2416 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2417 getOpcode() == ISD::VP_LOAD)
2418 ? 2
2419 : 3);
2420 }
2421 const SDValue &getBasePtr() const {
2422 return getOperand((getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2423 getOpcode() == ISD::VP_LOAD)
2424 ? 1
2425 : 2);
2426 }
2427 const SDValue &getMask() const {
2428 switch (getOpcode()) {
2429 default:
2430 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2430)
;
2431 case ISD::VP_LOAD:
2432 return getOperand(3);
2433 case ISD::VP_STORE:
2434 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2435 return getOperand(4);
2436 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2437 return getOperand(5);
2438 }
2439 }
2440 const SDValue &getVectorLength() const {
2441 switch (getOpcode()) {
2442 default:
2443 llvm_unreachable("Invalid opcode")::llvm::llvm_unreachable_internal("Invalid opcode", "llvm/include/llvm/CodeGen/SelectionDAGNodes.h"
, 2443)
;
2444 case ISD::VP_LOAD:
2445 return getOperand(4);
2446 case ISD::VP_STORE:
2447 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
2448 return getOperand(5);
2449 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
2450 return getOperand(6);
2451 }
2452 }
2453
2454 /// Return the addressing mode for this load or store:
2455 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2456 ISD::MemIndexedMode getAddressingMode() const {
2457 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2458 }
2459
2460 /// Return true if this is a pre/post inc/dec load/store.
2461 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2462
2463 /// Return true if this is NOT a pre/post inc/dec load/store.
2464 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2465
2466 static bool classof(const SDNode *N) {
2467 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD ||
2468 N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE ||
2469 N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
2470 }
2471};
2472
2473/// This class is used to represent a VP_LOAD node
2474class VPLoadSDNode : public VPBaseLoadStoreSDNode {
2475public:
2476 friend class SelectionDAG;
2477
2478 VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2479 ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
2480 EVT MemVT, MachineMemOperand *MMO)
2481 : VPBaseLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
2482 LoadSDNodeBits.ExtTy = ETy;
2483 LoadSDNodeBits.IsExpanding = isExpanding;
2484 }
2485
2486 ISD::LoadExtType getExtensionType() const {
2487 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2488 }
2489
2490 const SDValue &getBasePtr() const { return getOperand(1); }
2491 const SDValue &getOffset() const { return getOperand(2); }
2492 const SDValue &getMask() const { return getOperand(3); }
2493 const SDValue &getVectorLength() const { return getOperand(4); }
2494
2495 static bool classof(const SDNode *N) {
2496 return N->getOpcode() == ISD::VP_LOAD;
2497 }
2498 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2499};
2500
2501/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
2502class VPStridedLoadSDNode : public VPBaseLoadStoreSDNode {
2503public:
2504 friend class SelectionDAG;
2505
2506 VPStridedLoadSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2507 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2508 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2509 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, Order, DL, VTs,
2510 AM, MemVT, MMO) {
2511 LoadSDNodeBits.ExtTy = ETy;
2512 LoadSDNodeBits.IsExpanding = IsExpanding;
2513 }
2514
2515 ISD::LoadExtType getExtensionType() const {
2516 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2517 }
2518
2519 const SDValue &getBasePtr() const { return getOperand(1); }
2520 const SDValue &getOffset() const { return getOperand(2); }
2521 const SDValue &getStride() const { return getOperand(3); }
2522 const SDValue &getMask() const { return getOperand(4); }
2523 const SDValue &getVectorLength() const { return getOperand(5); }
2524
2525 static bool classof(const SDNode *N) {
2526 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD;
2527 }
2528 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2529};
2530
2531/// This class is used to represent a VP_STORE node
2532class VPStoreSDNode : public VPBaseLoadStoreSDNode {
2533public:
2534 friend class SelectionDAG;
2535
2536 VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2537 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2538 EVT MemVT, MachineMemOperand *MMO)
2539 : VPBaseLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
2540 StoreSDNodeBits.IsTruncating = isTrunc;
2541 StoreSDNodeBits.IsCompressing = isCompressing;
2542 }
2543
2544 /// Return true if this is a truncating store.
2545 /// For integers this is the same as doing a TRUNCATE and storing the result.
2546 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2547 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2548
2549 /// Returns true if the op does a compression to the vector before storing.
2550 /// The node contiguously stores the active elements (integers or floats)
2551 /// in src (those with their respective bit set in writemask k) to unaligned
2552 /// memory at base_addr.
2553 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2554
2555 const SDValue &getValue() const { return getOperand(1); }
2556 const SDValue &getBasePtr() const { return getOperand(2); }
2557 const SDValue &getOffset() const { return getOperand(3); }
2558 const SDValue &getMask() const { return getOperand(4); }
2559 const SDValue &getVectorLength() const { return getOperand(5); }
2560
2561 static bool classof(const SDNode *N) {
2562 return N->getOpcode() == ISD::VP_STORE;
2563 }
2564};
2565
2566/// This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
2567class VPStridedStoreSDNode : public VPBaseLoadStoreSDNode {
2568public:
2569 friend class SelectionDAG;
2570
2571 VPStridedStoreSDNode(unsigned Order, const DebugLoc &DL, SDVTList VTs,
2572 ISD::MemIndexedMode AM, bool IsTrunc, bool IsCompressing,
2573 EVT MemVT, MachineMemOperand *MMO)
2574 : VPBaseLoadStoreSDNode(ISD::EXPERIMENTAL_VP_STRIDED_STORE, Order, DL,
2575 VTs, AM, MemVT, MMO) {
2576 StoreSDNodeBits.IsTruncating = IsTrunc;
2577 StoreSDNodeBits.IsCompressing = IsCompressing;
2578 }
2579
2580 /// Return true if this is a truncating store.
2581 /// For integers this is the same as doing a TRUNCATE and storing the result.
2582 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2583 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2584
2585 /// Returns true if the op does a compression to the vector before storing.
2586 /// The node contiguously stores the active elements (integers or floats)
2587 /// in src (those with their respective bit set in writemask k) to unaligned
2588 /// memory at base_addr.
2589 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2590
2591 const SDValue &getValue() const { return getOperand(1); }
2592 const SDValue &getBasePtr() const { return getOperand(2); }
2593 const SDValue &getOffset() const { return getOperand(3); }
2594 const SDValue &getStride() const { return getOperand(4); }
2595 const SDValue &getMask() const { return getOperand(5); }
2596 const SDValue &getVectorLength() const { return getOperand(6); }
2597
2598 static bool classof(const SDNode *N) {
2599 return N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE;
2600 }
2601};
2602
2603/// This base class is used to represent MLOAD and MSTORE nodes
2604class MaskedLoadStoreSDNode : public MemSDNode {
2605public:
2606 friend class SelectionDAG;
2607
2608 MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
2609 const DebugLoc &dl, SDVTList VTs,
2610 ISD::MemIndexedMode AM, EVT MemVT,
2611 MachineMemOperand *MMO)
2612 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2613 LSBaseSDNodeBits.AddressingMode = AM;
2614 assert(getAddressingMode() == AM && "Value truncated")(static_cast <bool> (getAddressingMode() == AM &&
"Value truncated") ? void (0) : __assert_fail ("getAddressingMode() == AM && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2614, __extension__
__PRETTY_FUNCTION__))
;
2615 }
2616
2617 // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru)
2618 // MaskedStoreSDNode (Chain, data, ptr, offset, mask)
2619 // Mask is a vector of i1 elements
2620 const SDValue &getOffset() const {
2621 return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3);
2622 }
2623 const SDValue &getMask() const {
2624 return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4);
2625 }
2626
2627 /// Return the addressing mode for this load or store:
2628 /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
2629 ISD::MemIndexedMode getAddressingMode() const {
2630 return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
2631 }
2632
2633 /// Return true if this is a pre/post inc/dec load/store.
2634 bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
2635
2636 /// Return true if this is NOT a pre/post inc/dec load/store.
2637 bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
2638
2639 static bool classof(const SDNode *N) {
2640 return N->getOpcode() == ISD::MLOAD ||
2641 N->getOpcode() == ISD::MSTORE;
2642 }
2643};
2644
2645/// This class is used to represent an MLOAD node
2646class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
2647public:
2648 friend class SelectionDAG;
2649
2650 MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2651 ISD::MemIndexedMode AM, ISD::LoadExtType ETy,
2652 bool IsExpanding, EVT MemVT, MachineMemOperand *MMO)
2653 : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) {
2654 LoadSDNodeBits.ExtTy = ETy;
2655 LoadSDNodeBits.IsExpanding = IsExpanding;
2656 }
2657
2658 ISD::LoadExtType getExtensionType() const {
2659 return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
2660 }
2661
2662 const SDValue &getBasePtr() const { return getOperand(1); }
2663 const SDValue &getOffset() const { return getOperand(2); }
2664 const SDValue &getMask() const { return getOperand(3); }
2665 const SDValue &getPassThru() const { return getOperand(4); }
2666
2667 static bool classof(const SDNode *N) {
2668 return N->getOpcode() == ISD::MLOAD;
2669 }
2670
2671 bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
2672};
2673
2674/// This class is used to represent an MSTORE node
2675class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
2676public:
2677 friend class SelectionDAG;
2678
2679 MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2680 ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
2681 EVT MemVT, MachineMemOperand *MMO)
2682 : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) {
2683 StoreSDNodeBits.IsTruncating = isTrunc;
2684 StoreSDNodeBits.IsCompressing = isCompressing;
2685 }
2686
2687 /// Return true if the op does a truncation before store.
2688 /// For integers this is the same as doing a TRUNCATE and storing the result.
2689 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2690 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2691
2692 /// Returns true if the op does a compression to the vector before storing.
2693 /// The node contiguously stores the active elements (integers or floats)
2694 /// in src (those with their respective bit set in writemask k) to unaligned
2695 /// memory at base_addr.
2696 bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
2697
2698 const SDValue &getValue() const { return getOperand(1); }
2699 const SDValue &getBasePtr() const { return getOperand(2); }
2700 const SDValue &getOffset() const { return getOperand(3); }
2701 const SDValue &getMask() const { return getOperand(4); }
2702
2703 static bool classof(const SDNode *N) {
2704 return N->getOpcode() == ISD::MSTORE;
2705 }
2706};
2707
2708/// This is a base class used to represent
2709/// VP_GATHER and VP_SCATTER nodes
2710///
2711class VPGatherScatterSDNode : public MemSDNode {
2712public:
2713 friend class SelectionDAG;
2714
2715 VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2716 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2717 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2718 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2719 LSBaseSDNodeBits.AddressingMode = IndexType;
2720 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2720, __extension__
__PRETTY_FUNCTION__))
;
2721 }
2722
2723 /// How is Index applied to BasePtr when computing addresses.
2724 ISD::MemIndexType getIndexType() const {
2725 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2726 }
2727 bool isIndexScaled() const {
2728 return !cast<ConstantSDNode>(getScale())->isOne();
2729 }
2730 bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
2731
2732 // In the both nodes address is Op1, mask is Op2:
2733 // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
2734 // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
2735 // Mask is a vector of i1 elements
2736 const SDValue &getBasePtr() const {
2737 return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
2738 }
2739 const SDValue &getIndex() const {
2740 return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
2741 }
2742 const SDValue &getScale() const {
2743 return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
2744 }
2745 const SDValue &getMask() const {
2746 return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
2747 }
2748 const SDValue &getVectorLength() const {
2749 return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
2750 }
2751
2752 static bool classof(const SDNode *N) {
2753 return N->getOpcode() == ISD::VP_GATHER ||
2754 N->getOpcode() == ISD::VP_SCATTER;
2755 }
2756};
2757
2758/// This class is used to represent an VP_GATHER node
2759///
2760class VPGatherSDNode : public VPGatherScatterSDNode {
2761public:
2762 friend class SelectionDAG;
2763
2764 VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2765 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2766 : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
2767 IndexType) {}
2768
2769 static bool classof(const SDNode *N) {
2770 return N->getOpcode() == ISD::VP_GATHER;
2771 }
2772};
2773
2774/// This class is used to represent an VP_SCATTER node
2775///
2776class VPScatterSDNode : public VPGatherScatterSDNode {
2777public:
2778 friend class SelectionDAG;
2779
2780 VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2781 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2782 : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
2783 IndexType) {}
2784
2785 const SDValue &getValue() const { return getOperand(1); }
2786
2787 static bool classof(const SDNode *N) {
2788 return N->getOpcode() == ISD::VP_SCATTER;
2789 }
2790};
2791
2792/// This is a base class used to represent
2793/// MGATHER and MSCATTER nodes
2794///
2795class MaskedGatherScatterSDNode : public MemSDNode {
2796public:
2797 friend class SelectionDAG;
2798
2799 MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
2800 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
2801 MachineMemOperand *MMO, ISD::MemIndexType IndexType)
2802 : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
2803 LSBaseSDNodeBits.AddressingMode = IndexType;
2804 assert(getIndexType() == IndexType && "Value truncated")(static_cast <bool> (getIndexType() == IndexType &&
"Value truncated") ? void (0) : __assert_fail ("getIndexType() == IndexType && \"Value truncated\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2804, __extension__
__PRETTY_FUNCTION__))
;
2805 }
2806
2807 /// How is Index applied to BasePtr when computing addresses.
2808 ISD::MemIndexType getIndexType() const {
2809 return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
2810 }
2811 bool isIndexScaled() const {
2812 return !cast<ConstantSDNode>(getScale())->isOne();
2813 }
2814 bool isIndexSigned() const { return isIndexTypeSigned(getIndexType()); }
2815
2816 // In the both nodes address is Op1, mask is Op2:
2817 // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale)
2818 // MaskedScatterSDNode (Chain, value, mask, base, index, scale)
2819 // Mask is a vector of i1 elements
2820 const SDValue &getBasePtr() const { return getOperand(3); }
2821 const SDValue &getIndex() const { return getOperand(4); }
2822 const SDValue &getMask() const { return getOperand(2); }
2823 const SDValue &getScale() const { return getOperand(5); }
2824
2825 static bool classof(const SDNode *N) {
2826 return N->getOpcode() == ISD::MGATHER ||
2827 N->getOpcode() == ISD::MSCATTER;
2828 }
2829};
2830
2831/// This class is used to represent an MGATHER node
2832///
2833class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
2834public:
2835 friend class SelectionDAG;
2836
2837 MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2838 EVT MemVT, MachineMemOperand *MMO,
2839 ISD::MemIndexType IndexType, ISD::LoadExtType ETy)
2840 : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO,
2841 IndexType) {
2842 LoadSDNodeBits.ExtTy = ETy;
2843 }
2844
2845 const SDValue &getPassThru() const { return getOperand(1); }
2846
2847 ISD::LoadExtType getExtensionType() const {
2848 return ISD::LoadExtType(LoadSDNodeBits.ExtTy);
2849 }
2850
2851 static bool classof(const SDNode *N) {
2852 return N->getOpcode() == ISD::MGATHER;
2853 }
2854};
2855
2856/// This class is used to represent an MSCATTER node
2857///
2858class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
2859public:
2860 friend class SelectionDAG;
2861
2862 MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
2863 EVT MemVT, MachineMemOperand *MMO,
2864 ISD::MemIndexType IndexType, bool IsTrunc)
2865 : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO,
2866 IndexType) {
2867 StoreSDNodeBits.IsTruncating = IsTrunc;
2868 }
2869
2870 /// Return true if the op does a truncation before store.
2871 /// For integers this is the same as doing a TRUNCATE and storing the result.
2872 /// For floats, it is the same as doing an FP_ROUND and storing the result.
2873 bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
2874
2875 const SDValue &getValue() const { return getOperand(1); }
2876
2877 static bool classof(const SDNode *N) {
2878 return N->getOpcode() == ISD::MSCATTER;
2879 }
2880};
2881
2882/// An SDNode that represents everything that will be needed
2883/// to construct a MachineInstr. These nodes are created during the
2884/// instruction selection proper phase.
2885///
2886/// Note that the only supported way to set the `memoperands` is by calling the
2887/// `SelectionDAG::setNodeMemRefs` function as the memory management happens
2888/// inside the DAG rather than in the node.
2889class MachineSDNode : public SDNode {
2890private:
2891 friend class SelectionDAG;
2892
2893 MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
2894 : SDNode(Opc, Order, DL, VTs) {}
2895
2896 // We use a pointer union between a single `MachineMemOperand` pointer and
2897 // a pointer to an array of `MachineMemOperand` pointers. This is null when
2898 // the number of these is zero, the single pointer variant used when the
2899 // number is one, and the array is used for larger numbers.
2900 //
2901 // The array is allocated via the `SelectionDAG`'s allocator and so will
2902 // always live until the DAG is cleaned up and doesn't require ownership here.
2903 //
2904 // We can't use something simpler like `TinyPtrVector` here because `SDNode`
2905 // subclasses aren't managed in a conforming C++ manner. See the comments on
2906 // `SelectionDAG::MorphNodeTo` which details what all goes on, but the
2907 // constraint here is that these don't manage memory with their constructor or
2908 // destructor and can be initialized to a good state even if they start off
2909 // uninitialized.
2910 PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {};
2911
2912 // Note that this could be folded into the above `MemRefs` member if doing so
2913 // is advantageous at some point. We don't need to store this in most cases.
2914 // However, at the moment this doesn't appear to make the allocation any
2915 // smaller and makes the code somewhat simpler to read.
2916 int NumMemRefs = 0;
2917
2918public:
2919 using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator;
2920
2921 ArrayRef<MachineMemOperand *> memoperands() const {
2922 // Special case the common cases.
2923 if (NumMemRefs == 0)
2924 return {};
2925 if (NumMemRefs == 1)
2926 return makeArrayRef(MemRefs.getAddrOfPtr1(), 1);
2927
2928 // Otherwise we have an actual array.
2929 return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
2930 }
2931 mmo_iterator memoperands_begin() const { return memoperands().begin(); }
2932 mmo_iterator memoperands_end() const { return memoperands().end(); }
2933 bool memoperands_empty() const { return memoperands().empty(); }
2934
2935 /// Clear out the memory reference descriptor list.
2936 void clearMemRefs() {
2937 MemRefs = nullptr;
2938 NumMemRefs = 0;
2939 }
2940
2941 static bool classof(const SDNode *N) {
2942 return N->isMachineOpcode();
2943 }
2944};
2945
2946/// An SDNode that records if a register contains a value that is guaranteed to
2947/// be aligned accordingly.
2948class AssertAlignSDNode : public SDNode {
2949 Align Alignment;
2950
2951public:
2952 AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
2953 : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
2954
2955 Align getAlign() const { return Alignment; }
2956
2957 static bool classof(const SDNode *N) {
2958 return N->getOpcode() == ISD::AssertAlign;
2959 }
2960};
2961
2962class SDNodeIterator {
2963 const SDNode *Node;
2964 unsigned Operand;
2965
2966 SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
2967
2968public:
2969 using iterator_category = std::forward_iterator_tag;
2970 using value_type = SDNode;
2971 using difference_type = std::ptrdiff_t;
2972 using pointer = value_type *;
2973 using reference = value_type &;
2974
2975 bool operator==(const SDNodeIterator& x) const {
2976 return Operand == x.Operand;
2977 }
2978 bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
2979
2980 pointer operator*() const {
2981 return Node->getOperand(Operand).getNode();
2982 }
2983 pointer operator->() const { return operator*(); }
2984
2985 SDNodeIterator& operator++() { // Preincrement
2986 ++Operand;
2987 return *this;
2988 }
2989 SDNodeIterator operator++(int) { // Postincrement
2990 SDNodeIterator tmp = *this; ++*this; return tmp;
2991 }
2992 size_t operator-(SDNodeIterator Other) const {
2993 assert(Node == Other.Node &&(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2994, __extension__
__PRETTY_FUNCTION__))
2994 "Cannot compare iterators of two different nodes!")(static_cast <bool> (Node == Other.Node && "Cannot compare iterators of two different nodes!"
) ? void (0) : __assert_fail ("Node == Other.Node && \"Cannot compare iterators of two different nodes!\""
, "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", 2994, __extension__
__PRETTY_FUNCTION__))
;
2995 return Operand - Other.Operand;
2996 }
2997
2998 static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); }
2999 static SDNodeIterator end (const SDNode *N) {
3000 return SDNodeIterator(N, N->getNumOperands());
3001 }
3002
3003 unsigned getOperand() const { return Operand; }
3004 const SDNode *getNode() const { return Node; }
3005};
3006
3007template <> struct GraphTraits<SDNode*> {
3008 using NodeRef = SDNode *;
3009 using ChildIteratorType = SDNodeIterator;
3010
3011 static NodeRef getEntryNode(SDNode *N) { return N; }
3012
3013 static ChildIteratorType child_begin(NodeRef N) {
3014 return SDNodeIterator::begin(N);
3015 }
3016
3017 static ChildIteratorType child_end(NodeRef N) {
3018 return SDNodeIterator::end(N);
3019 }
3020};
3021
3022/// A representation of the largest SDNode, for use in sizeof().
3023///
3024/// This needs to be a union because the largest node differs on 32 bit systems
3025/// with 4 and 8 byte pointer alignment, respectively.
3026using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
3027 BlockAddressSDNode,
3028 GlobalAddressSDNode,
3029 PseudoProbeSDNode>;
3030
3031/// The SDNode class with the greatest alignment requirement.
3032using MostAlignedSDNode = GlobalAddressSDNode;
3033
3034namespace ISD {
3035
3036 /// Returns true if the specified node is a non-extending and unindexed load.
3037 inline bool isNormalLoad(const SDNode *N) {
3038 const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
3039 return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
3040 Ld->getAddressingMode() == ISD::UNINDEXED;
3041 }
3042
3043 /// Returns true if the specified node is a non-extending load.
3044 inline bool isNON_EXTLoad(const SDNode *N) {
3045 return isa<LoadSDNode>(N) &&
3046 cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
3047 }
3048
3049 /// Returns true if the specified node is a EXTLOAD.
3050 inline bool isEXTLoad(const SDNode *N) {
3051 return isa<LoadSDNode>(N) &&
3052 cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
3053 }
3054
3055 /// Returns true if the specified node is a SEXTLOAD.
3056 inline bool isSEXTLoad(const SDNode *N) {
3057 return isa<LoadSDNode>(N) &&
3058 cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
3059 }
3060
3061 /// Returns true if the specified node is a ZEXTLOAD.
3062 inline bool isZEXTLoad(const SDNode *N) {
3063 return isa<LoadSDNode>(N) &&
3064 cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
3065 }
3066
3067 /// Returns true if the specified node is an unindexed load.
3068 inline bool isUNINDEXEDLoad(const SDNode *N) {
3069 return isa<LoadSDNode>(N) &&
3070 cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3071 }
3072
3073 /// Returns true if the specified node is a non-truncating
3074 /// and unindexed store.
3075 inline bool isNormalStore(const SDNode *N) {
3076 const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
3077 return St && !St->isTruncatingStore() &&
3078 St->getAddressingMode() == ISD::UNINDEXED;
3079 }
3080
3081 /// Returns true if the specified node is an unindexed store.
3082 inline bool isUNINDEXEDStore(const SDNode *N) {
3083 return isa<StoreSDNode>(N) &&
3084 cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
3085 }
3086
3087 /// Attempt to match a unary predicate against a scalar/splat constant or
3088 /// every element of a constant BUILD_VECTOR.
3089 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3090 bool matchUnaryPredicate(SDValue Op,
3091 std::function<bool(ConstantSDNode *)> Match,
3092 bool AllowUndefs = false);
3093
3094 /// Attempt to match a binary predicate against a pair of scalar/splat
3095 /// constants or every element of a pair of constant BUILD_VECTORs.
3096 /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
3097 /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
3098 bool matchBinaryPredicate(
3099 SDValue LHS, SDValue RHS,
3100 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
3101 bool AllowUndefs = false, bool AllowTypeMismatch = false);
3102
3103 /// Returns true if the specified value is the overflow result from one
3104 /// of the overflow intrinsic nodes.
3105 inline bool isOverflowIntrOpRes(SDValue Op) {
3106 unsigned Opc = Op.getOpcode();
3107 return (Op.getResNo() == 1 &&
3108 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3109 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
3110 }
3111
3112} // end namespace ISD
3113
3114} // end namespace llvm
3115
3116#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H