Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetTransformInfo.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/TargetInstrInfo.h"
60#include "llvm/CodeGen/TargetLowering.h"
61#include "llvm/CodeGen/TargetOpcodes.h"
62#include "llvm/CodeGen/TargetRegisterInfo.h"
63#include "llvm/CodeGen/TargetSubtargetInfo.h"
64#include "llvm/CodeGen/ValueTypes.h"
65#include "llvm/IR/Attributes.h"
66#include "llvm/IR/CallingConv.h"
67#include "llvm/IR/Constant.h"
68#include "llvm/IR/Constants.h"
69#include "llvm/IR/DataLayout.h"
70#include "llvm/IR/DebugLoc.h"
71#include "llvm/IR/DerivedTypes.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GlobalAlias.h"
74#include "llvm/IR/GlobalValue.h"
75#include "llvm/IR/GlobalVariable.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/InlineAsm.h"
78#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Instructions.h"
80#include "llvm/IR/IntrinsicInst.h"
81#include "llvm/IR/Intrinsics.h"
82#include "llvm/IR/IntrinsicsARM.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/PatternMatch.h"
85#include "llvm/IR/Type.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/MC/MCInstrDesc.h"
89#include "llvm/MC/MCInstrItineraries.h"
90#include "llvm/MC/MCRegisterInfo.h"
91#include "llvm/MC/MCSchedule.h"
92#include "llvm/Support/AtomicOrdering.h"
93#include "llvm/Support/BranchProbability.h"
94#include "llvm/Support/Casting.h"
95#include "llvm/Support/CodeGen.h"
96#include "llvm/Support/CommandLine.h"
97#include "llvm/Support/Compiler.h"
98#include "llvm/Support/Debug.h"
99#include "llvm/Support/ErrorHandling.h"
100#include "llvm/Support/KnownBits.h"
101#include "llvm/Support/MachineValueType.h"
102#include "llvm/Support/MathExtras.h"
103#include "llvm/Support/raw_ostream.h"
104#include "llvm/Target/TargetMachine.h"
105#include "llvm/Target/TargetOptions.h"
106#include <algorithm>
107#include <cassert>
108#include <cstdint>
109#include <cstdlib>
110#include <iterator>
111#include <limits>
112#include <string>
113#include <tuple>
114#include <utility>
115#include <vector>
116
117using namespace llvm;
118using namespace llvm::PatternMatch;
119
120#define DEBUG_TYPE"arm-isel" "arm-isel"
121
122STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls"}
;
123STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt"}
;
124STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments"}
;
125STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
126 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
;
127
128static cl::opt<bool>
129ARMInterworking("arm-interworking", cl::Hidden,
130 cl::desc("Enable / disable ARM interworking (for debugging only)"),
131 cl::init(true));
132
133static cl::opt<bool> EnableConstpoolPromotion(
134 "arm-promote-constant", cl::Hidden,
135 cl::desc("Enable / disable promotion of unnamed_addr constants into "
136 "constant pools"),
137 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
138static cl::opt<unsigned> ConstpoolPromotionMaxSize(
139 "arm-promote-constant-max-size", cl::Hidden,
140 cl::desc("Maximum size of constant to promote into a constant pool"),
141 cl::init(64));
142static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
143 "arm-promote-constant-max-total", cl::Hidden,
144 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
145 cl::init(128));
146
147cl::opt<unsigned>
148MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150 cl::init(2));
151
152// The APCS parameter registers.
153static const MCPhysReg GPRArgRegs[] = {
154 ARM::R0, ARM::R1, ARM::R2, ARM::R3
155};
156
157void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
158 MVT PromotedBitwiseVT) {
159 if (VT != PromotedLdStVT) {
160 setOperationAction(ISD::LOAD, VT, Promote);
161 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162
163 setOperationAction(ISD::STORE, VT, Promote);
164 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165 }
166
167 MVT ElemTy = VT.getVectorElementType();
168 if (ElemTy != MVT::f64)
169 setOperationAction(ISD::SETCC, VT, Custom);
170 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172 if (ElemTy == MVT::i32) {
173 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177 } else {
178 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182 }
183 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187 setOperationAction(ISD::SELECT, VT, Expand);
188 setOperationAction(ISD::SELECT_CC, VT, Expand);
189 setOperationAction(ISD::VSELECT, VT, Expand);
190 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191 if (VT.isInteger()) {
192 setOperationAction(ISD::SHL, VT, Custom);
193 setOperationAction(ISD::SRA, VT, Custom);
194 setOperationAction(ISD::SRL, VT, Custom);
195 }
196
197 // Promote all bit-wise operations.
198 if (VT.isInteger() && VT != PromotedBitwiseVT) {
199 setOperationAction(ISD::AND, VT, Promote);
200 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
201 setOperationAction(ISD::OR, VT, Promote);
202 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
203 setOperationAction(ISD::XOR, VT, Promote);
204 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
205 }
206
207 // Neon does not support vector divide/remainder operations.
208 setOperationAction(ISD::SDIV, VT, Expand);
209 setOperationAction(ISD::UDIV, VT, Expand);
210 setOperationAction(ISD::FDIV, VT, Expand);
211 setOperationAction(ISD::SREM, VT, Expand);
212 setOperationAction(ISD::UREM, VT, Expand);
213 setOperationAction(ISD::FREM, VT, Expand);
214 setOperationAction(ISD::SDIVREM, VT, Expand);
215 setOperationAction(ISD::UDIVREM, VT, Expand);
216
217 if (!VT.isFloatingPoint() &&
218 VT != MVT::v2i64 && VT != MVT::v1i64)
219 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
220 setOperationAction(Opcode, VT, Legal);
221 if (!VT.isFloatingPoint())
222 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
223 setOperationAction(Opcode, VT, Legal);
224}
225
226void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
227 addRegisterClass(VT, &ARM::DPRRegClass);
228 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
229}
230
231void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
232 addRegisterClass(VT, &ARM::DPairRegClass);
233 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
234}
235
236void ARMTargetLowering::setAllExpand(MVT VT) {
237 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
238 setOperationAction(Opc, VT, Expand);
239
240 // We support these really simple operations even on types where all
241 // the actual arithmetic has to be broken down into simpler
242 // operations or turned into library calls.
243 setOperationAction(ISD::BITCAST, VT, Legal);
244 setOperationAction(ISD::LOAD, VT, Legal);
245 setOperationAction(ISD::STORE, VT, Legal);
246 setOperationAction(ISD::UNDEF, VT, Legal);
247}
248
249void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
250 LegalizeAction Action) {
251 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
252 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
253 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
254}
255
256void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
257 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
258
259 for (auto VT : IntTypes) {
260 addRegisterClass(VT, &ARM::MQPRRegClass);
261 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
262 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
263 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
264 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
265 setOperationAction(ISD::SHL, VT, Custom);
266 setOperationAction(ISD::SRA, VT, Custom);
267 setOperationAction(ISD::SRL, VT, Custom);
268 setOperationAction(ISD::SMIN, VT, Legal);
269 setOperationAction(ISD::SMAX, VT, Legal);
270 setOperationAction(ISD::UMIN, VT, Legal);
271 setOperationAction(ISD::UMAX, VT, Legal);
272 setOperationAction(ISD::ABS, VT, Legal);
273 setOperationAction(ISD::SETCC, VT, Custom);
274 setOperationAction(ISD::MLOAD, VT, Custom);
275 setOperationAction(ISD::MSTORE, VT, Legal);
276 setOperationAction(ISD::CTLZ, VT, Legal);
277 setOperationAction(ISD::CTTZ, VT, Custom);
278 setOperationAction(ISD::BITREVERSE, VT, Legal);
279 setOperationAction(ISD::BSWAP, VT, Legal);
280 setOperationAction(ISD::SADDSAT, VT, Legal);
281 setOperationAction(ISD::UADDSAT, VT, Legal);
282 setOperationAction(ISD::SSUBSAT, VT, Legal);
283 setOperationAction(ISD::USUBSAT, VT, Legal);
284
285 // No native support for these.
286 setOperationAction(ISD::UDIV, VT, Expand);
287 setOperationAction(ISD::SDIV, VT, Expand);
288 setOperationAction(ISD::UREM, VT, Expand);
289 setOperationAction(ISD::SREM, VT, Expand);
290 setOperationAction(ISD::UDIVREM, VT, Expand);
291 setOperationAction(ISD::SDIVREM, VT, Expand);
292 setOperationAction(ISD::CTPOP, VT, Expand);
293 setOperationAction(ISD::SELECT, VT, Expand);
294 setOperationAction(ISD::SELECT_CC, VT, Expand);
295
296 // Vector reductions
297 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
298 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
299 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
300 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
301 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
302 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
303 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
304 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
305 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
306
307 if (!HasMVEFP) {
308 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
309 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
310 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
311 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
316 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
317 setIndexedLoadAction(im, VT, Legal);
318 setIndexedStoreAction(im, VT, Legal);
319 setIndexedMaskedLoadAction(im, VT, Legal);
320 setIndexedMaskedStoreAction(im, VT, Legal);
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
331 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
332 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
333 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
334 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
335 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
336 setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
337 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
338 setOperationAction(ISD::SETCC, VT, Custom);
339 setOperationAction(ISD::MLOAD, VT, Custom);
340 setOperationAction(ISD::MSTORE, VT, Legal);
341 setOperationAction(ISD::SELECT, VT, Expand);
342 setOperationAction(ISD::SELECT_CC, VT, Expand);
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
346 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
347 setIndexedLoadAction(im, VT, Legal);
348 setIndexedStoreAction(im, VT, Legal);
349 setIndexedMaskedLoadAction(im, VT, Legal);
350 setIndexedMaskedStoreAction(im, VT, Legal);
351 }
352
353 if (HasMVEFP) {
354 setOperationAction(ISD::FMINNUM, VT, Legal);
355 setOperationAction(ISD::FMAXNUM, VT, Legal);
356 setOperationAction(ISD::FROUND, VT, Legal);
357 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
358 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
359 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
360 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
361
362 // No native support for these.
363 setOperationAction(ISD::FDIV, VT, Expand);
364 setOperationAction(ISD::FREM, VT, Expand);
365 setOperationAction(ISD::FSQRT, VT, Expand);
366 setOperationAction(ISD::FSIN, VT, Expand);
367 setOperationAction(ISD::FCOS, VT, Expand);
368 setOperationAction(ISD::FPOW, VT, Expand);
369 setOperationAction(ISD::FLOG, VT, Expand);
370 setOperationAction(ISD::FLOG2, VT, Expand);
371 setOperationAction(ISD::FLOG10, VT, Expand);
372 setOperationAction(ISD::FEXP, VT, Expand);
373 setOperationAction(ISD::FEXP2, VT, Expand);
374 setOperationAction(ISD::FNEARBYINT, VT, Expand);
375 }
376 }
377
378 // Custom Expand smaller than legal vector reductions to prevent false zero
379 // items being added.
380 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
381 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
382 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
383 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
384 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
385 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
386 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
387 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
388
389 // We 'support' these types up to bitcast/load/store level, regardless of
390 // MVE integer-only / float support. Only doing FP data processing on the FP
391 // vector types is inhibited at integer-only level.
392 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
393 for (auto VT : LongTypes) {
394 addRegisterClass(VT, &ARM::MQPRRegClass);
395 setAllExpand(VT);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
398 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
399 }
400 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
401
402 // We can do bitwise operations on v2i64 vectors
403 setOperationAction(ISD::AND, MVT::v2i64, Legal);
404 setOperationAction(ISD::OR, MVT::v2i64, Legal);
405 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
406
407 // It is legal to extload from v4i8 to v4i16 or v4i32.
408 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
409 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
410 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
411
412 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
413 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
414 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
415 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
416 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
417 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
418
419 // Some truncating stores are legal too.
420 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
421 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
422 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
423
424 // Pre and Post inc on these are legal, given the correct extends
425 for (unsigned im = (unsigned)ISD::PRE_INC;
426 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
427 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
428 setIndexedLoadAction(im, VT, Legal);
429 setIndexedStoreAction(im, VT, Legal);
430 setIndexedMaskedLoadAction(im, VT, Legal);
431 setIndexedMaskedStoreAction(im, VT, Legal);
432 }
433 }
434
435 // Predicate types
436 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
437 for (auto VT : pTypes) {
438 addRegisterClass(VT, &ARM::VCCRRegClass);
439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
442 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
443 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
444 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
445 setOperationAction(ISD::SETCC, VT, Custom);
446 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
447 setOperationAction(ISD::LOAD, VT, Custom);
448 setOperationAction(ISD::STORE, VT, Custom);
449 setOperationAction(ISD::TRUNCATE, VT, Custom);
450 setOperationAction(ISD::VSELECT, VT, Expand);
451 setOperationAction(ISD::SELECT, VT, Expand);
452 }
453}
454
455ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
456 const ARMSubtarget &STI)
457 : TargetLowering(TM), Subtarget(&STI) {
458 RegInfo = Subtarget->getRegisterInfo();
459 Itins = Subtarget->getInstrItineraryData();
460
461 setBooleanContents(ZeroOrOneBooleanContent);
462 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
463
464 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
465 !Subtarget->isTargetWatchOS()) {
466 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
467 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
468 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
469 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
470 : CallingConv::ARM_AAPCS);
471 }
472
473 if (Subtarget->isTargetMachO()) {
474 // Uses VFP for Thumb libfuncs if available.
475 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
476 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
477 static const struct {
478 const RTLIB::Libcall Op;
479 const char * const Name;
480 const ISD::CondCode Cond;
481 } LibraryCalls[] = {
482 // Single-precision floating-point arithmetic.
483 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
484 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
485 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
486 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
487
488 // Double-precision floating-point arithmetic.
489 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
490 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
491 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
492 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
493
494 // Single-precision comparisons.
495 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
496 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
497 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
498 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
499 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
500 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
501 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
502
503 // Double-precision comparisons.
504 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
505 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
506 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
507 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
508 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
509 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
510 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
511
512 // Floating-point to integer conversions.
513 // i64 conversions are done via library routines even when generating VFP
514 // instructions, so use the same ones.
515 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
516 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
517 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
518 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
519
520 // Conversions between floating types.
521 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
522 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
523
524 // Integer to floating-point conversions.
525 // i64 conversions are done via library routines even when generating VFP
526 // instructions, so use the same ones.
527 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
528 // e.g., __floatunsidf vs. __floatunssidfvfp.
529 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
530 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
531 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
532 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
533 };
534
535 for (const auto &LC : LibraryCalls) {
536 setLibcallName(LC.Op, LC.Name);
537 if (LC.Cond != ISD::SETCC_INVALID)
538 setCmpLibcallCC(LC.Op, LC.Cond);
539 }
540 }
541 }
542
543 // These libcalls are not available in 32-bit.
544 setLibcallName(RTLIB::SHL_I128, nullptr);
545 setLibcallName(RTLIB::SRL_I128, nullptr);
546 setLibcallName(RTLIB::SRA_I128, nullptr);
547
548 // RTLIB
549 if (Subtarget->isAAPCS_ABI() &&
550 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
551 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
552 static const struct {
553 const RTLIB::Libcall Op;
554 const char * const Name;
555 const CallingConv::ID CC;
556 const ISD::CondCode Cond;
557 } LibraryCalls[] = {
558 // Double-precision floating-point arithmetic helper functions
559 // RTABI chapter 4.1.2, Table 2
560 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
561 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
562 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
563 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
564
565 // Double-precision floating-point comparison helper functions
566 // RTABI chapter 4.1.2, Table 3
567 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
568 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
569 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
570 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
571 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
572 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
573 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
574
575 // Single-precision floating-point arithmetic helper functions
576 // RTABI chapter 4.1.2, Table 4
577 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
578 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
579 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
580 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
581
582 // Single-precision floating-point comparison helper functions
583 // RTABI chapter 4.1.2, Table 5
584 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
585 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
586 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
587 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
588 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
589 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
590 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
591
592 // Floating-point to integer conversions.
593 // RTABI chapter 4.1.2, Table 6
594 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
597 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
598 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
599 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
600 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602
603 // Conversions between floating types.
604 // RTABI chapter 4.1.2, Table 7
605 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608
609 // Integer to floating-point conversions.
610 // RTABI chapter 4.1.2, Table 8
611 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
615 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
617 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619
620 // Long long helper functions
621 // RTABI chapter 4.2, Table 9
622 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
626
627 // Integer division functions
628 // RTABI chapter 4.3.1
629 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
635 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 };
638
639 for (const auto &LC : LibraryCalls) {
640 setLibcallName(LC.Op, LC.Name);
641 setLibcallCallingConv(LC.Op, LC.CC);
642 if (LC.Cond != ISD::SETCC_INVALID)
643 setCmpLibcallCC(LC.Op, LC.Cond);
644 }
645
646 // EABI dependent RTLIB
647 if (TM.Options.EABIVersion == EABI::EABI4 ||
648 TM.Options.EABIVersion == EABI::EABI5) {
649 static const struct {
650 const RTLIB::Libcall Op;
651 const char *const Name;
652 const CallingConv::ID CC;
653 const ISD::CondCode Cond;
654 } MemOpsLibraryCalls[] = {
655 // Memory operations
656 // RTABI chapter 4.3.4
657 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
659 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
660 };
661
662 for (const auto &LC : MemOpsLibraryCalls) {
663 setLibcallName(LC.Op, LC.Name);
664 setLibcallCallingConv(LC.Op, LC.CC);
665 if (LC.Cond != ISD::SETCC_INVALID)
666 setCmpLibcallCC(LC.Op, LC.Cond);
667 }
668 }
669 }
670
671 if (Subtarget->isTargetWindows()) {
672 static const struct {
673 const RTLIB::Libcall Op;
674 const char * const Name;
675 const CallingConv::ID CC;
676 } LibraryCalls[] = {
677 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
678 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
679 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
680 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
681 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
682 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
683 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
684 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
685 };
686
687 for (const auto &LC : LibraryCalls) {
688 setLibcallName(LC.Op, LC.Name);
689 setLibcallCallingConv(LC.Op, LC.CC);
690 }
691 }
692
693 // Use divmod compiler-rt calls for iOS 5.0 and later.
694 if (Subtarget->isTargetMachO() &&
695 !(Subtarget->isTargetIOS() &&
696 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
697 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
698 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
699 }
700
701 // The half <-> float conversion functions are always soft-float on
702 // non-watchos platforms, but are needed for some targets which use a
703 // hard-float calling convention by default.
704 if (!Subtarget->isTargetWatchABI()) {
705 if (Subtarget->isAAPCS_ABI()) {
706 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
707 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
708 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
709 } else {
710 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
711 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
712 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
713 }
714 }
715
716 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
717 // a __gnu_ prefix (which is the default).
718 if (Subtarget->isTargetAEABI()) {
719 static const struct {
720 const RTLIB::Libcall Op;
721 const char * const Name;
722 const CallingConv::ID CC;
723 } LibraryCalls[] = {
724 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
725 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
726 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
727 };
728
729 for (const auto &LC : LibraryCalls) {
730 setLibcallName(LC.Op, LC.Name);
731 setLibcallCallingConv(LC.Op, LC.CC);
732 }
733 }
734
735 if (Subtarget->isThumb1Only())
736 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
737 else
738 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
739
740 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
741 Subtarget->hasFPRegs()) {
742 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
743 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
744 if (!Subtarget->hasVFP2Base())
745 setAllExpand(MVT::f32);
746 if (!Subtarget->hasFP64())
747 setAllExpand(MVT::f64);
748 }
749
750 if (Subtarget->hasFullFP16()) {
751 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
752 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
753 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
754
755 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
756 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
757 }
758
759 if (Subtarget->hasBF16()) {
760 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
761 setAllExpand(MVT::bf16);
762 if (!Subtarget->hasFullFP16())
763 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
764 }
765
766 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
767 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
768 setTruncStoreAction(VT, InnerVT, Expand);
769 addAllExtLoads(VT, InnerVT, Expand);
770 }
771
772 setOperationAction(ISD::MULHS, VT, Expand);
773 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
774 setOperationAction(ISD::MULHU, VT, Expand);
775 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
776
777 setOperationAction(ISD::BSWAP, VT, Expand);
778 }
779
780 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
781 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
782
783 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
784 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
785
786 if (Subtarget->hasMVEIntegerOps())
787 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
788
789 // Combine low-overhead loop intrinsics so that we can lower i1 types.
790 if (Subtarget->hasLOB()) {
791 setTargetDAGCombine(ISD::BRCOND);
792 setTargetDAGCombine(ISD::BR_CC);
793 }
794
795 if (Subtarget->hasNEON()) {
796 addDRTypeForNEON(MVT::v2f32);
797 addDRTypeForNEON(MVT::v8i8);
798 addDRTypeForNEON(MVT::v4i16);
799 addDRTypeForNEON(MVT::v2i32);
800 addDRTypeForNEON(MVT::v1i64);
801
802 addQRTypeForNEON(MVT::v4f32);
803 addQRTypeForNEON(MVT::v2f64);
804 addQRTypeForNEON(MVT::v16i8);
805 addQRTypeForNEON(MVT::v8i16);
806 addQRTypeForNEON(MVT::v4i32);
807 addQRTypeForNEON(MVT::v2i64);
808
809 if (Subtarget->hasFullFP16()) {
810 addQRTypeForNEON(MVT::v8f16);
811 addDRTypeForNEON(MVT::v4f16);
812 }
813
814 if (Subtarget->hasBF16()) {
815 addQRTypeForNEON(MVT::v8bf16);
816 addDRTypeForNEON(MVT::v4bf16);
817 }
818 }
819
820 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
821 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
822 // none of Neon, MVE or VFP supports any arithmetic operations on it.
823 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
824 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
825 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
826 // FIXME: Code duplication: FDIV and FREM are expanded always, see
827 // ARMTargetLowering::addTypeForNEON method for details.
828 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
829 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
830 // FIXME: Create unittest.
831 // In another words, find a way when "copysign" appears in DAG with vector
832 // operands.
833 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
834 // FIXME: Code duplication: SETCC has custom operation action, see
835 // ARMTargetLowering::addTypeForNEON method for details.
836 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
837 // FIXME: Create unittest for FNEG and for FABS.
838 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
839 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
840 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
841 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
842 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
843 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
844 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
845 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
846 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
847 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
848 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
849 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
850 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
851 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
852 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
853 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
854 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
855 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
856 }
857
858 if (Subtarget->hasNEON()) {
859 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
860 // supported for v4f32.
861 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
862 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
863 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
864 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
865 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
866 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
867 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
868 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
869 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
870 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
871 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
872 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
873 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
874 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
875
876 // Mark v2f32 intrinsics.
877 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
878 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
879 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
880 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
881 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
882 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
883 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
884 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
885 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
886 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
887 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
888 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
889 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
890 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
891
892 // Neon does not support some operations on v1i64 and v2i64 types.
893 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
894 // Custom handling for some quad-vector types to detect VMULL.
895 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
896 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
897 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
898 // Custom handling for some vector types to avoid expensive expansions
899 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
900 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
901 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
902 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
903 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
904 // a destination type that is wider than the source, and nor does
905 // it have a FP_TO_[SU]INT instruction with a narrower destination than
906 // source.
907 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
908 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
909 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
910 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
911 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
912 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
913 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
914 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
915
916 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
917 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
918
919 // NEON does not have single instruction CTPOP for vectors with element
920 // types wider than 8-bits. However, custom lowering can leverage the
921 // v8i8/v16i8 vcnt instruction.
922 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
923 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
924 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
925 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
926 setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
927 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
928
929 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
930 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
931
932 // NEON does not have single instruction CTTZ for vectors.
933 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
934 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
935 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
936 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
937
938 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
939 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
940 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
941 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
942
943 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
944 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
945 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
946 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
947
948 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
949 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
950 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
951 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
952
953 // NEON only has FMA instructions as of VFP4.
954 if (!Subtarget->hasVFP4Base()) {
955 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
956 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
957 }
958
959 setTargetDAGCombine(ISD::SHL);
960 setTargetDAGCombine(ISD::SRL);
961 setTargetDAGCombine(ISD::SRA);
962 setTargetDAGCombine(ISD::FP_TO_SINT);
963 setTargetDAGCombine(ISD::FP_TO_UINT);
964 setTargetDAGCombine(ISD::FDIV);
965 setTargetDAGCombine(ISD::LOAD);
966
967 // It is legal to extload from v4i8 to v4i16 or v4i32.
968 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
969 MVT::v2i32}) {
970 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
971 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
972 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
973 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
974 }
975 }
976 }
977
978 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
979 setTargetDAGCombine(ISD::BUILD_VECTOR);
980 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
981 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
982 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
983 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
984 setTargetDAGCombine(ISD::STORE);
985 setTargetDAGCombine(ISD::SIGN_EXTEND);
986 setTargetDAGCombine(ISD::ZERO_EXTEND);
987 setTargetDAGCombine(ISD::ANY_EXTEND);
988 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
989 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
990 setTargetDAGCombine(ISD::INTRINSIC_VOID);
991 setTargetDAGCombine(ISD::VECREDUCE_ADD);
992 setTargetDAGCombine(ISD::ADD);
993 setTargetDAGCombine(ISD::BITCAST);
994 }
995 if (Subtarget->hasMVEIntegerOps()) {
996 setTargetDAGCombine(ISD::SMIN);
997 setTargetDAGCombine(ISD::UMIN);
998 setTargetDAGCombine(ISD::SMAX);
999 setTargetDAGCombine(ISD::UMAX);
1000 setTargetDAGCombine(ISD::FP_EXTEND);
1001 setTargetDAGCombine(ISD::SELECT);
1002 setTargetDAGCombine(ISD::SELECT_CC);
1003 }
1004
1005 if (!Subtarget->hasFP64()) {
1006 // When targeting a floating-point unit with only single-precision
1007 // operations, f64 is legal for the few double-precision instructions which
1008 // are present However, no double-precision operations other than moves,
1009 // loads and stores are provided by the hardware.
1010 setOperationAction(ISD::FADD, MVT::f64, Expand);
1011 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1012 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1013 setOperationAction(ISD::FMA, MVT::f64, Expand);
1014 setOperationAction(ISD::FDIV, MVT::f64, Expand);
1015 setOperationAction(ISD::FREM, MVT::f64, Expand);
1016 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
1017 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
1018 setOperationAction(ISD::FNEG, MVT::f64, Expand);
1019 setOperationAction(ISD::FABS, MVT::f64, Expand);
1020 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
1021 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1022 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1023 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1024 setOperationAction(ISD::FLOG, MVT::f64, Expand);
1025 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
1026 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
1027 setOperationAction(ISD::FEXP, MVT::f64, Expand);
1028 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
1029 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
1030 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
1031 setOperationAction(ISD::FRINT, MVT::f64, Expand);
1032 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
1033 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
1034 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
1035 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
1036 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
1037 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
1038 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
1039 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
1040 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
1041 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
1042 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
1043 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
1044 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
1045 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
1046 }
1047
1048 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1049 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
1050 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
1051 if (Subtarget->hasFullFP16()) {
1052 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1053 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1054 }
1055 }
1056
1057 if (!Subtarget->hasFP16()) {
1058 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
1059 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
1060 }
1061
1062 computeRegisterProperties(Subtarget->getRegisterInfo());
1063
1064 // ARM does not have floating-point extending loads.
1065 for (MVT VT : MVT::fp_valuetypes()) {
1066 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1067 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1068 }
1069
1070 // ... or truncating stores
1071 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1072 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1073 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1074
1075 // ARM does not have i1 sign extending load.
1076 for (MVT VT : MVT::integer_valuetypes())
1077 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1078
1079 // ARM supports all 4 flavors of integer indexed load / store.
1080 if (!Subtarget->isThumb1Only()) {
1081 for (unsigned im = (unsigned)ISD::PRE_INC;
1082 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1083 setIndexedLoadAction(im, MVT::i1, Legal);
1084 setIndexedLoadAction(im, MVT::i8, Legal);
1085 setIndexedLoadAction(im, MVT::i16, Legal);
1086 setIndexedLoadAction(im, MVT::i32, Legal);
1087 setIndexedStoreAction(im, MVT::i1, Legal);
1088 setIndexedStoreAction(im, MVT::i8, Legal);
1089 setIndexedStoreAction(im, MVT::i16, Legal);
1090 setIndexedStoreAction(im, MVT::i32, Legal);
1091 }
1092 } else {
1093 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1094 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1095 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1096 }
1097
1098 setOperationAction(ISD::SADDO, MVT::i32, Custom);
1099 setOperationAction(ISD::UADDO, MVT::i32, Custom);
1100 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
1101 setOperationAction(ISD::USUBO, MVT::i32, Custom);
1102
1103 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
1104 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1105 if (Subtarget->hasDSP()) {
1106 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1107 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1108 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1109 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1110 }
1111 if (Subtarget->hasBaseDSP()) {
1112 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1113 setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
1114 }
1115
1116 // i64 operation support.
1117 setOperationAction(ISD::MUL, MVT::i64, Expand);
1118 setOperationAction(ISD::MULHU, MVT::i32, Expand);
1119 if (Subtarget->isThumb1Only()) {
1120 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
1121 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
1122 }
1123 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1124 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1125 setOperationAction(ISD::MULHS, MVT::i32, Expand);
1126
1127 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
1128 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
1129 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
1130 setOperationAction(ISD::SRL, MVT::i64, Custom);
1131 setOperationAction(ISD::SRA, MVT::i64, Custom);
1132 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1133 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1134 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1135 setOperationAction(ISD::STORE, MVT::i64, Custom);
1136
1137 // MVE lowers 64 bit shifts to lsll and lsrl
1138 // assuming that ISD::SRL and SRA of i64 are already marked custom
1139 if (Subtarget->hasMVEIntegerOps())
1140 setOperationAction(ISD::SHL, MVT::i64, Custom);
1141
1142 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1143 if (Subtarget->isThumb1Only()) {
1144 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
1145 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
1146 setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
1147 }
1148
1149 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1150 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1151
1152 // ARM does not have ROTL.
1153 setOperationAction(ISD::ROTL, MVT::i32, Expand);
1154 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1155 setOperationAction(ISD::ROTL, VT, Expand);
1156 setOperationAction(ISD::ROTR, VT, Expand);
1157 }
1158 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1159 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1160 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1161 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
1162 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
1163 }
1164
1165 // @llvm.readcyclecounter requires the Performance Monitors extension.
1166 // Default to the 0 expansion on unsupported platforms.
1167 // FIXME: Technically there are older ARM CPUs that have
1168 // implementation-specific ways of obtaining this information.
1169 if (Subtarget->hasPerfMon())
1170 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1171
1172 // Only ARMv6 has BSWAP.
1173 if (!Subtarget->hasV6Ops())
1174 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
1175
1176 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1177 : Subtarget->hasDivideInARMMode();
1178 if (!hasDivide) {
1179 // These are expanded into libcalls if the cpu doesn't have HW divider.
1180 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
1181 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
1182 }
1183
1184 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1185 setOperationAction(ISD::SDIV, MVT::i32, Custom);
1186 setOperationAction(ISD::UDIV, MVT::i32, Custom);
1187
1188 setOperationAction(ISD::SDIV, MVT::i64, Custom);
1189 setOperationAction(ISD::UDIV, MVT::i64, Custom);
1190 }
1191
1192 setOperationAction(ISD::SREM, MVT::i32, Expand);
1193 setOperationAction(ISD::UREM, MVT::i32, Expand);
1194
1195 // Register based DivRem for AEABI (RTABI 4.2)
1196 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1197 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1198 Subtarget->isTargetWindows()) {
1199 setOperationAction(ISD::SREM, MVT::i64, Custom);
1200 setOperationAction(ISD::UREM, MVT::i64, Custom);
1201 HasStandaloneRem = false;
1202
1203 if (Subtarget->isTargetWindows()) {
1204 const struct {
1205 const RTLIB::Libcall Op;
1206 const char * const Name;
1207 const CallingConv::ID CC;
1208 } LibraryCalls[] = {
1209 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1210 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1211 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1212 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1213
1214 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1215 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1216 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1217 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1218 };
1219
1220 for (const auto &LC : LibraryCalls) {
1221 setLibcallName(LC.Op, LC.Name);
1222 setLibcallCallingConv(LC.Op, LC.CC);
1223 }
1224 } else {
1225 const struct {
1226 const RTLIB::Libcall Op;
1227 const char * const Name;
1228 const CallingConv::ID CC;
1229 } LibraryCalls[] = {
1230 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1231 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1232 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1233 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1234
1235 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1236 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1237 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1238 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1239 };
1240
1241 for (const auto &LC : LibraryCalls) {
1242 setLibcallName(LC.Op, LC.Name);
1243 setLibcallCallingConv(LC.Op, LC.CC);
1244 }
1245 }
1246
1247 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
1248 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
1249 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
1250 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
1251 } else {
1252 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
1253 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
1254 }
1255
1256 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1257 // MSVCRT doesn't have powi; fall back to pow
1258 setLibcallName(RTLIB::POWI_F32, nullptr);
1259 setLibcallName(RTLIB::POWI_F64, nullptr);
1260 }
1261
1262 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1263 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1264 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1265 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1266
1267 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1268 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1269
1270 // Use the default implementation.
1271 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1272 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1273 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1274 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1275 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1276 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1277
1278 if (Subtarget->isTargetWindows())
1279 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1280 else
1281 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1282
1283 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1284 // the default expansion.
1285 InsertFencesForAtomic = false;
1286 if (Subtarget->hasAnyDataBarrier() &&
1287 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1288 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1289 // to ldrex/strex loops already.
1290 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1291 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1292 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1293
1294 // On v8, we have particularly efficient implementations of atomic fences
1295 // if they can be combined with nearby atomic loads and stores.
1296 if (!Subtarget->hasAcquireRelease() ||
1297 getTargetMachine().getOptLevel() == 0) {
1298 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1299 InsertFencesForAtomic = true;
1300 }
1301 } else {
1302 // If there's anything we can use as a barrier, go through custom lowering
1303 // for ATOMIC_FENCE.
1304 // If target has DMB in thumb, Fences can be inserted.
1305 if (Subtarget->hasDataBarrier())
1306 InsertFencesForAtomic = true;
1307
1308 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1309 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1310
1311 // Set them all for expansion, which will force libcalls.
1312 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1313 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1314 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1315 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1316 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1317 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1318 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1319 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1320 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1321 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1322 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1323 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1324 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1325 // Unordered/Monotonic case.
1326 if (!InsertFencesForAtomic) {
1327 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1328 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1329 }
1330 }
1331
1332 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1333
1334 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1335 if (!Subtarget->hasV6Ops()) {
1336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1338 }
1339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1340
1341 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1342 !Subtarget->isThumb1Only()) {
1343 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1344 // iff target supports vfp2.
1345 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1346 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1347 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1348 }
1349
1350 // We want to custom lower some of our intrinsics.
1351 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1352 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1353 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1354 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1355 if (Subtarget->useSjLjEH())
1356 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1357
1358 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1359 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1360 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1361 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1362 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1363 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1364 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1365 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1366 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1367 if (Subtarget->hasFullFP16()) {
1368 setOperationAction(ISD::SETCC, MVT::f16, Expand);
1369 setOperationAction(ISD::SELECT, MVT::f16, Custom);
1370 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1371 }
1372
1373 setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1374
1375 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1376 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1377 if (Subtarget->hasFullFP16())
1378 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1379 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1380 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1381 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1382
1383 // We don't support sin/cos/fmod/copysign/pow
1384 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1385 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1386 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1387 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1388 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1389 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1390 setOperationAction(ISD::FREM, MVT::f64, Expand);
1391 setOperationAction(ISD::FREM, MVT::f32, Expand);
1392 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1393 !Subtarget->isThumb1Only()) {
1394 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1395 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1396 }
1397 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1398 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1399
1400 if (!Subtarget->hasVFP4Base()) {
1401 setOperationAction(ISD::FMA, MVT::f64, Expand);
1402 setOperationAction(ISD::FMA, MVT::f32, Expand);
1403 }
1404
1405 // Various VFP goodness
1406 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1407 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1408 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1409 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1410 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1411 }
1412
1413 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1414 if (!Subtarget->hasFP16()) {
1415 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1416 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1417 }
1418
1419 // Strict floating-point comparisons need custom lowering.
1420 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1421 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1422 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
1423 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
1424 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
1425 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
1426 }
1427
1428 // Use __sincos_stret if available.
1429 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1430 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1431 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1432 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1433 }
1434
1435 // FP-ARMv8 implements a lot of rounding-like FP operations.
1436 if (Subtarget->hasFPARMv8Base()) {
1437 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1438 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1439 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1440 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1441 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1442 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1443 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1444 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1445 if (Subtarget->hasNEON()) {
1446 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1447 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1448 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1449 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1450 }
1451
1452 if (Subtarget->hasFP64()) {
1453 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1454 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1455 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1456 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1457 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1458 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1459 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1460 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1461 }
1462 }
1463
1464 // FP16 often need to be promoted to call lib functions
1465 if (Subtarget->hasFullFP16()) {
1466 setOperationAction(ISD::FREM, MVT::f16, Promote);
1467 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
1468 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1469 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1470 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1471 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1472 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1473 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1474 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1475 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1476 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1477 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1478
1479 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1480 }
1481
1482 if (Subtarget->hasNEON()) {
1483 // vmin and vmax aren't available in a scalar form, so we can use
1484 // a NEON instruction with an undef lane instead. This has a performance
1485 // penalty on some cores, so we don't do this unless we have been
1486 // asked to by the core tuning model.
1487 if (Subtarget->useNEONForSinglePrecisionFP()) {
1488 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1489 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1490 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1491 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1492 }
1493 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1494 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1495 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1496 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1497
1498 if (Subtarget->hasFullFP16()) {
1499 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1500 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1501 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1502 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1503
1504 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1505 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1506 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1507 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1508 }
1509 }
1510
1511 // We have target-specific dag combine patterns for the following nodes:
1512 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1513 setTargetDAGCombine(ISD::ADD);
1514 setTargetDAGCombine(ISD::SUB);
1515 setTargetDAGCombine(ISD::MUL);
1516 setTargetDAGCombine(ISD::AND);
1517 setTargetDAGCombine(ISD::OR);
1518 setTargetDAGCombine(ISD::XOR);
1519
1520 if (Subtarget->hasMVEIntegerOps())
1521 setTargetDAGCombine(ISD::VSELECT);
1522
1523 if (Subtarget->hasV6Ops())
1524 setTargetDAGCombine(ISD::SRL);
1525 if (Subtarget->isThumb1Only())
1526 setTargetDAGCombine(ISD::SHL);
1527
1528 setStackPointerRegisterToSaveRestore(ARM::SP);
1529
1530 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1531 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1532 setSchedulingPreference(Sched::RegPressure);
1533 else
1534 setSchedulingPreference(Sched::Hybrid);
1535
1536 //// temporary - rewrite interface to use type
1537 MaxStoresPerMemset = 8;
1538 MaxStoresPerMemsetOptSize = 4;
1539 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1540 MaxStoresPerMemcpyOptSize = 2;
1541 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1542 MaxStoresPerMemmoveOptSize = 2;
1543
1544 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1545 // are at least 4 bytes aligned.
1546 setMinStackArgumentAlignment(Align(4));
1547
1548 // Prefer likely predicted branches to selects on out-of-order cores.
1549 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1550
1551 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1552
1553 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1554
1555 if (Subtarget->isThumb() || Subtarget->isThumb2())
1556 setTargetDAGCombine(ISD::ABS);
1557}
1558
1559bool ARMTargetLowering::useSoftFloat() const {
1560 return Subtarget->useSoftFloat();
1561}
1562
1563// FIXME: It might make sense to define the representative register class as the
1564// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1565// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1566// SPR's representative would be DPR_VFP2. This should work well if register
1567// pressure tracking were modified such that a register use would increment the
1568// pressure of the register class's representative and all of it's super
1569// classes' representatives transitively. We have not implemented this because
1570// of the difficulty prior to coalescing of modeling operand register classes
1571// due to the common occurrence of cross class copies and subregister insertions
1572// and extractions.
1573std::pair<const TargetRegisterClass *, uint8_t>
1574ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1575 MVT VT) const {
1576 const TargetRegisterClass *RRC = nullptr;
1577 uint8_t Cost = 1;
1578 switch (VT.SimpleTy) {
1579 default:
1580 return TargetLowering::findRepresentativeClass(TRI, VT);
1581 // Use DPR as representative register class for all floating point
1582 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1583 // the cost is 1 for both f32 and f64.
1584 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1585 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1586 RRC = &ARM::DPRRegClass;
1587 // When NEON is used for SP, only half of the register file is available
1588 // because operations that define both SP and DP results will be constrained
1589 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1590 // coalescing by double-counting the SP regs. See the FIXME above.
1591 if (Subtarget->useNEONForSinglePrecisionFP())
1592 Cost = 2;
1593 break;
1594 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1595 case MVT::v4f32: case MVT::v2f64:
1596 RRC = &ARM::DPRRegClass;
1597 Cost = 2;
1598 break;
1599 case MVT::v4i64:
1600 RRC = &ARM::DPRRegClass;
1601 Cost = 4;
1602 break;
1603 case MVT::v8i64:
1604 RRC = &ARM::DPRRegClass;
1605 Cost = 8;
1606 break;
1607 }
1608 return std::make_pair(RRC, Cost);
1609}
1610
1611const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1612 switch ((ARMISD::NodeType)Opcode) {
1613 case ARMISD::FIRST_NUMBER: break;
1614 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1615 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1616 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1617 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1618 case ARMISD::CALL: return "ARMISD::CALL";
1619 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1620 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1621 case ARMISD::tSECALL: return "ARMISD::tSECALL";
1622 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1623 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1624 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1625 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1626 case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG";
1627 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1628 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1629 case ARMISD::CMP: return "ARMISD::CMP";
1630 case ARMISD::CMN: return "ARMISD::CMN";
1631 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1632 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1633 case ARMISD::CMPFPE: return "ARMISD::CMPFPE";
1634 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1635 case ARMISD::CMPFPEw0: return "ARMISD::CMPFPEw0";
1636 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1637 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1638
1639 case ARMISD::CMOV: return "ARMISD::CMOV";
1640 case ARMISD::SUBS: return "ARMISD::SUBS";
1641
1642 case ARMISD::SSAT: return "ARMISD::SSAT";
1643 case ARMISD::USAT: return "ARMISD::USAT";
1644
1645 case ARMISD::ASRL: return "ARMISD::ASRL";
1646 case ARMISD::LSRL: return "ARMISD::LSRL";
1647 case ARMISD::LSLL: return "ARMISD::LSLL";
1648
1649 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1650 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1651 case ARMISD::RRX: return "ARMISD::RRX";
1652
1653 case ARMISD::ADDC: return "ARMISD::ADDC";
1654 case ARMISD::ADDE: return "ARMISD::ADDE";
1655 case ARMISD::SUBC: return "ARMISD::SUBC";
1656 case ARMISD::SUBE: return "ARMISD::SUBE";
1657 case ARMISD::LSLS: return "ARMISD::LSLS";
1658
1659 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1660 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1661 case ARMISD::VMOVhr: return "ARMISD::VMOVhr";
1662 case ARMISD::VMOVrh: return "ARMISD::VMOVrh";
1663 case ARMISD::VMOVSR: return "ARMISD::VMOVSR";
1664
1665 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1666 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1667 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1668
1669 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1670
1671 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1672
1673 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1674
1675 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1676
1677 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1678
1679 case ARMISD::LDRD: return "ARMISD::LDRD";
1680 case ARMISD::STRD: return "ARMISD::STRD";
1681
1682 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1683 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1684
1685 case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
1686 case ARMISD::VECTOR_REG_CAST: return "ARMISD::VECTOR_REG_CAST";
1687 case ARMISD::VCMP: return "ARMISD::VCMP";
1688 case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
1689 case ARMISD::VTST: return "ARMISD::VTST";
1690
1691 case ARMISD::VSHLs: return "ARMISD::VSHLs";
1692 case ARMISD::VSHLu: return "ARMISD::VSHLu";
1693 case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
1694 case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
1695 case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
1696 case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
1697 case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
1698 case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
1699 case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
1700 case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
1701 case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
1702 case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
1703 case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
1704 case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
1705 case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
1706 case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
1707 case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
1708 case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
1709 case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
1710 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1711 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1712 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1713 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1714 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1715 case ARMISD::VDUP: return "ARMISD::VDUP";
1716 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1717 case ARMISD::VEXT: return "ARMISD::VEXT";
1718 case ARMISD::VREV64: return "ARMISD::VREV64";
1719 case ARMISD::VREV32: return "ARMISD::VREV32";
1720 case ARMISD::VREV16: return "ARMISD::VREV16";
1721 case ARMISD::VZIP: return "ARMISD::VZIP";
1722 case ARMISD::VUZP: return "ARMISD::VUZP";
1723 case ARMISD::VTRN: return "ARMISD::VTRN";
1724 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1725 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1726 case ARMISD::VMOVN: return "ARMISD::VMOVN";
1727 case ARMISD::VQMOVNs: return "ARMISD::VQMOVNs";
1728 case ARMISD::VQMOVNu: return "ARMISD::VQMOVNu";
1729 case ARMISD::VCVTN: return "ARMISD::VCVTN";
1730 case ARMISD::VCVTL: return "ARMISD::VCVTL";
1731 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1732 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1733 case ARMISD::VQDMULH: return "ARMISD::VQDMULH";
1734 case ARMISD::VADDVs: return "ARMISD::VADDVs";
1735 case ARMISD::VADDVu: return "ARMISD::VADDVu";
1736 case ARMISD::VADDVps: return "ARMISD::VADDVps";
1737 case ARMISD::VADDVpu: return "ARMISD::VADDVpu";
1738 case ARMISD::VADDLVs: return "ARMISD::VADDLVs";
1739 case ARMISD::VADDLVu: return "ARMISD::VADDLVu";
1740 case ARMISD::VADDLVAs: return "ARMISD::VADDLVAs";
1741 case ARMISD::VADDLVAu: return "ARMISD::VADDLVAu";
1742 case ARMISD::VADDLVps: return "ARMISD::VADDLVps";
1743 case ARMISD::VADDLVpu: return "ARMISD::VADDLVpu";
1744 case ARMISD::VADDLVAps: return "ARMISD::VADDLVAps";
1745 case ARMISD::VADDLVApu: return "ARMISD::VADDLVApu";
1746 case ARMISD::VMLAVs: return "ARMISD::VMLAVs";
1747 case ARMISD::VMLAVu: return "ARMISD::VMLAVu";
1748 case ARMISD::VMLAVps: return "ARMISD::VMLAVps";
1749 case ARMISD::VMLAVpu: return "ARMISD::VMLAVpu";
1750 case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
1751 case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
1752 case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
1753 case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
1754 case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
1755 case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
1756 case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
1757 case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
1758 case ARMISD::VMINVu: return "ARMISD::VMINVu";
1759 case ARMISD::VMINVs: return "ARMISD::VMINVs";
1760 case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
1761 case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
1762 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1763 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1764 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1765 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1766 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1767 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1768 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1769 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1770 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1771 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1772 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1773 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1774 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1775 case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
1776 case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1777 case ARMISD::QADD16b: return "ARMISD::QADD16b";
1778 case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1779 case ARMISD::QADD8b: return "ARMISD::QADD8b";
1780 case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
1781 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1782 case ARMISD::BFI: return "ARMISD::BFI";
1783 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1784 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1785 case ARMISD::VBSP: return "ARMISD::VBSP";
1786 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1787 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1788 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1789 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1790 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1791 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1792 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1793 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1794 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1795 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1796 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1797 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1798 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1799 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1800 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1801 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1802 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1803 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1804 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1805 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1806 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1807 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1808 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1809 case ARMISD::WLS: return "ARMISD::WLS";
1810 case ARMISD::WLSSETUP: return "ARMISD::WLSSETUP";
1811 case ARMISD::LE: return "ARMISD::LE";
1812 case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
1813 case ARMISD::CSINV: return "ARMISD::CSINV";
1814 case ARMISD::CSNEG: return "ARMISD::CSNEG";
1815 case ARMISD::CSINC: return "ARMISD::CSINC";
1816 }
1817 return nullptr;
1818}
1819
1820EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1821 EVT VT) const {
1822 if (!VT.isVector())
1823 return getPointerTy(DL);
1824
1825 // MVE has a predicate register.
1826 if (Subtarget->hasMVEIntegerOps() &&
1827 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
1828 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1829 return VT.changeVectorElementTypeToInteger();
1830}
1831
1832/// getRegClassFor - Return the register class that should be used for the
1833/// specified value type.
1834const TargetRegisterClass *
1835ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1836 (void)isDivergent;
1837 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1838 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1839 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1840 // MVE Q registers.
1841 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1842 if (VT == MVT::v4i64)
1843 return &ARM::QQPRRegClass;
1844 if (VT == MVT::v8i64)
1845 return &ARM::QQQQPRRegClass;
1846 }
1847 return TargetLowering::getRegClassFor(VT);
1848}
1849
1850// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1851// source/dest is aligned and the copy size is large enough. We therefore want
1852// to align such objects passed to memory intrinsics.
1853bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1854 unsigned &PrefAlign) const {
1855 if (!isa<MemIntrinsic>(CI))
1856 return false;
1857 MinSize = 8;
1858 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1859 // cycle faster than 4-byte aligned LDM.
1860 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1861 return true;
1862}
1863
1864// Create a fast isel object.
1865FastISel *
1866ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1867 const TargetLibraryInfo *libInfo) const {
1868 return ARM::createFastISel(funcInfo, libInfo);
1869}
1870
1871Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1872 unsigned NumVals = N->getNumValues();
1873 if (!NumVals)
1874 return Sched::RegPressure;
1875
1876 for (unsigned i = 0; i != NumVals; ++i) {
1877 EVT VT = N->getValueType(i);
1878 if (VT == MVT::Glue || VT == MVT::Other)
1879 continue;
1880 if (VT.isFloatingPoint() || VT.isVector())
1881 return Sched::ILP;
1882 }
1883
1884 if (!N->isMachineOpcode())
1885 return Sched::RegPressure;
1886
1887 // Load are scheduled for latency even if there instruction itinerary
1888 // is not available.
1889 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1890 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1891
1892 if (MCID.getNumDefs() == 0)
1893 return Sched::RegPressure;
1894 if (!Itins->isEmpty() &&
1895 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1896 return Sched::ILP;
1897
1898 return Sched::RegPressure;
1899}
1900
1901//===----------------------------------------------------------------------===//
1902// Lowering Code
1903//===----------------------------------------------------------------------===//
1904
1905static bool isSRL16(const SDValue &Op) {
1906 if (Op.getOpcode() != ISD::SRL)
1907 return false;
1908 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1909 return Const->getZExtValue() == 16;
1910 return false;
1911}
1912
1913static bool isSRA16(const SDValue &Op) {
1914 if (Op.getOpcode() != ISD::SRA)
1915 return false;
1916 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1917 return Const->getZExtValue() == 16;
1918 return false;
1919}
1920
1921static bool isSHL16(const SDValue &Op) {
1922 if (Op.getOpcode() != ISD::SHL)
1923 return false;
1924 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1925 return Const->getZExtValue() == 16;
1926 return false;
1927}
1928
1929// Check for a signed 16-bit value. We special case SRA because it makes it
1930// more simple when also looking for SRAs that aren't sign extending a
1931// smaller value. Without the check, we'd need to take extra care with
1932// checking order for some operations.
1933static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1934 if (isSRA16(Op))
1935 return isSHL16(Op.getOperand(0));
1936 return DAG.ComputeNumSignBits(Op) == 17;
1937}
1938
1939/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1940static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1941 switch (CC) {
1942 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 1942)
;
1943 case ISD::SETNE: return ARMCC::NE;
1944 case ISD::SETEQ: return ARMCC::EQ;
1945 case ISD::SETGT: return ARMCC::GT;
1946 case ISD::SETGE: return ARMCC::GE;
1947 case ISD::SETLT: return ARMCC::LT;
1948 case ISD::SETLE: return ARMCC::LE;
1949 case ISD::SETUGT: return ARMCC::HI;
1950 case ISD::SETUGE: return ARMCC::HS;
1951 case ISD::SETULT: return ARMCC::LO;
1952 case ISD::SETULE: return ARMCC::LS;
1953 }
1954}
1955
1956/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1957static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1958 ARMCC::CondCodes &CondCode2) {
1959 CondCode2 = ARMCC::AL;
1960 switch (CC) {
1961 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 1961)
;
1962 case ISD::SETEQ:
1963 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1964 case ISD::SETGT:
1965 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1966 case ISD::SETGE:
1967 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1968 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1969 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1970 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
1971 case ISD::SETO: CondCode = ARMCC::VC; break;
1972 case ISD::SETUO: CondCode = ARMCC::VS; break;
1973 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
1974 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1975 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1976 case ISD::SETLT:
1977 case ISD::SETULT: CondCode = ARMCC::LT; break;
1978 case ISD::SETLE:
1979 case ISD::SETULE: CondCode = ARMCC::LE; break;
1980 case ISD::SETNE:
1981 case ISD::SETUNE: CondCode = ARMCC::NE; break;
1982 }
1983}
1984
1985//===----------------------------------------------------------------------===//
1986// Calling Convention Implementation
1987//===----------------------------------------------------------------------===//
1988
1989/// getEffectiveCallingConv - Get the effective calling convention, taking into
1990/// account presence of floating point hardware and calling convention
1991/// limitations, such as support for variadic functions.
1992CallingConv::ID
1993ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1994 bool isVarArg) const {
1995 switch (CC) {
1996 default:
1997 report_fatal_error("Unsupported calling convention");
1998 case CallingConv::ARM_AAPCS:
1999 case CallingConv::ARM_APCS:
2000 case CallingConv::GHC:
2001 case CallingConv::CFGuard_Check:
2002 return CC;
2003 case CallingConv::PreserveMost:
2004 return CallingConv::PreserveMost;
2005 case CallingConv::ARM_AAPCS_VFP:
2006 case CallingConv::Swift:
2007 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
2008 case CallingConv::C:
2009 if (!Subtarget->isAAPCS_ABI())
2010 return CallingConv::ARM_APCS;
2011 else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2012 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2013 !isVarArg)
2014 return CallingConv::ARM_AAPCS_VFP;
2015 else
2016 return CallingConv::ARM_AAPCS;
2017 case CallingConv::Fast:
2018 case CallingConv::CXX_FAST_TLS:
2019 if (!Subtarget->isAAPCS_ABI()) {
2020 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2021 return CallingConv::Fast;
2022 return CallingConv::ARM_APCS;
2023 } else if (Subtarget->hasVFP2Base() &&
2024 !Subtarget->isThumb1Only() && !isVarArg)
2025 return CallingConv::ARM_AAPCS_VFP;
2026 else
2027 return CallingConv::ARM_AAPCS;
2028 }
2029}
2030
2031CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2032 bool isVarArg) const {
2033 return CCAssignFnForNode(CC, false, isVarArg);
2034}
2035
2036CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
2037 bool isVarArg) const {
2038 return CCAssignFnForNode(CC, true, isVarArg);
2039}
2040
2041/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2042/// CallingConvention.
2043CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2044 bool Return,
2045 bool isVarArg) const {
2046 switch (getEffectiveCallingConv(CC, isVarArg)) {
2047 default:
2048 report_fatal_error("Unsupported calling convention");
2049 case CallingConv::ARM_APCS:
2050 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2051 case CallingConv::ARM_AAPCS:
2052 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2053 case CallingConv::ARM_AAPCS_VFP:
2054 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2055 case CallingConv::Fast:
2056 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2057 case CallingConv::GHC:
2058 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2059 case CallingConv::PreserveMost:
2060 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2061 case CallingConv::CFGuard_Check:
2062 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2063 }
2064}
2065
2066SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2067 MVT LocVT, MVT ValVT, SDValue Val) const {
2068 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2069 Val);
2070 if (Subtarget->hasFullFP16()) {
2071 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2072 } else {
2073 Val = DAG.getNode(ISD::TRUNCATE, dl,
2074 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2075 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2076 }
2077 return Val;
2078}
2079
2080SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2081 MVT LocVT, MVT ValVT,
2082 SDValue Val) const {
2083 if (Subtarget->hasFullFP16()) {
2084 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2085 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2086 } else {
2087 Val = DAG.getNode(ISD::BITCAST, dl,
2088 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2089 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2090 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2091 }
2092 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2093}
2094
2095/// LowerCallResult - Lower the result values of a call into the
2096/// appropriate copies out of appropriate physical registers.
2097SDValue ARMTargetLowering::LowerCallResult(
2098 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2099 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2100 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2101 SDValue ThisVal) const {
2102 // Assign locations to each value returned by this call.
2103 SmallVector<CCValAssign, 16> RVLocs;
2104 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2105 *DAG.getContext());
2106 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2107
2108 // Copy all of the result registers out of their specified physreg.
2109 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2110 CCValAssign VA = RVLocs[i];
2111
2112 // Pass 'this' value directly from the argument to return value, to avoid
2113 // reg unit interference
2114 if (i == 0 && isThisReturn) {
2115 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2116, __PRETTY_FUNCTION__))
2116 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2116, __PRETTY_FUNCTION__))
;
2117 InVals.push_back(ThisVal);
2118 continue;
2119 }
2120
2121 SDValue Val;
2122 if (VA.needsCustom() &&
2123 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2124 // Handle f64 or half of a v2f64.
2125 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2126 InFlag);
2127 Chain = Lo.getValue(1);
2128 InFlag = Lo.getValue(2);
2129 VA = RVLocs[++i]; // skip ahead to next loc
2130 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2131 InFlag);
2132 Chain = Hi.getValue(1);
2133 InFlag = Hi.getValue(2);
2134 if (!Subtarget->isLittle())
2135 std::swap (Lo, Hi);
2136 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2137
2138 if (VA.getLocVT() == MVT::v2f64) {
2139 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2140 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2141 DAG.getConstant(0, dl, MVT::i32));
2142
2143 VA = RVLocs[++i]; // skip ahead to next loc
2144 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2145 Chain = Lo.getValue(1);
2146 InFlag = Lo.getValue(2);
2147 VA = RVLocs[++i]; // skip ahead to next loc
2148 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2149 Chain = Hi.getValue(1);
2150 InFlag = Hi.getValue(2);
2151 if (!Subtarget->isLittle())
2152 std::swap (Lo, Hi);
2153 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2154 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2155 DAG.getConstant(1, dl, MVT::i32));
2156 }
2157 } else {
2158 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2159 InFlag);
2160 Chain = Val.getValue(1);
2161 InFlag = Val.getValue(2);
2162 }
2163
2164 switch (VA.getLocInfo()) {
2165 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2165)
;
2166 case CCValAssign::Full: break;
2167 case CCValAssign::BCvt:
2168 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2169 break;
2170 }
2171
2172 // f16 arguments have their size extended to 4 bytes and passed as if they
2173 // had been copied to the LSBs of a 32-bit register.
2174 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2175 if (VA.needsCustom() &&
2176 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2177 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2178
2179 InVals.push_back(Val);
2180 }
2181
2182 return Chain;
2183}
2184
2185/// LowerMemOpCallTo - Store the argument to the stack.
2186SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
2187 SDValue Arg, const SDLoc &dl,
2188 SelectionDAG &DAG,
2189 const CCValAssign &VA,
2190 ISD::ArgFlagsTy Flags) const {
2191 unsigned LocMemOffset = VA.getLocMemOffset();
2192 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2193 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2194 StackPtr, PtrOff);
2195 return DAG.getStore(
32
Calling 'SelectionDAG::getStore'
2196 Chain, dl, Arg, PtrOff,
31
Null pointer value stored to 'Val.Node'
2197 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
2198}
2199
2200void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2201 SDValue Chain, SDValue &Arg,
2202 RegsToPassVector &RegsToPass,
2203 CCValAssign &VA, CCValAssign &NextVA,
2204 SDValue &StackPtr,
2205 SmallVectorImpl<SDValue> &MemOpChains,
2206 ISD::ArgFlagsTy Flags) const {
2207 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2208 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2209 unsigned id = Subtarget->isLittle() ? 0 : 1;
24
Assuming the condition is false
25
'?' condition is false
2210 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2211
2212 if (NextVA.isRegLoc())
26
Taking false branch
2213 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2214 else {
2215 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2215, __PRETTY_FUNCTION__))
;
27
'?' condition is true
2216 if (!StackPtr.getNode())
28
Assuming the condition is false
29
Taking false branch
2217 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2218 getPointerTy(DAG.getDataLayout()));
2219
2220 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
30
Calling 'ARMTargetLowering::LowerMemOpCallTo'
2221 dl, DAG, NextVA,
2222 Flags));
2223 }
2224}
2225
2226/// LowerCall - Lowering a call into a callseq_start <-
2227/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2228/// nodes.
2229SDValue
2230ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2231 SmallVectorImpl<SDValue> &InVals) const {
2232 SelectionDAG &DAG = CLI.DAG;
2233 SDLoc &dl = CLI.DL;
2234 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2235 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2236 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2237 SDValue Chain = CLI.Chain;
2238 SDValue Callee = CLI.Callee;
2239 bool &isTailCall = CLI.IsTailCall;
2240 CallingConv::ID CallConv = CLI.CallConv;
2241 bool doesNotRet = CLI.DoesNotReturn;
2242 bool isVarArg = CLI.IsVarArg;
2243
2244 MachineFunction &MF = DAG.getMachineFunction();
2245 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2246 MachineFunction::CallSiteInfo CSInfo;
2247 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1
'?' condition is false
2248 bool isThisReturn = false;
2249 bool isCmseNSCall = false;
2250 bool PreferIndirect = false;
2251
2252 // Determine whether this is a non-secure function call.
2253 if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
2
Assuming field 'CB' is null
3
Taking false branch
2254 isCmseNSCall = true;
2255
2256 // Disable tail calls if they're not supported.
2257 if (!Subtarget->supportsTailCall())
4
Assuming the condition is false
5
Taking false branch
2258 isTailCall = false;
2259
2260 // For both the non-secure calls and the returns from a CMSE entry function,
2261 // the function needs to do some extra work afte r the call, or before the
2262 // return, respectively, thus it cannot end with atail call
2263 if (isCmseNSCall
5.1
'isCmseNSCall' is false
5.1
'isCmseNSCall' is false
5.1
'isCmseNSCall' is false
|| AFI->isCmseNSEntryFunction())
6
Assuming the condition is false
7
Taking false branch
2264 isTailCall = false;
2265
2266 if (isa<GlobalAddressSDNode>(Callee)) {
8
Assuming 'Callee' is not a 'GlobalAddressSDNode'
9
Taking false branch
2267 // If we're optimizing for minimum size and the function is called three or
2268 // more times in this block, we can improve codesize by calling indirectly
2269 // as BLXr has a 16-bit encoding.
2270 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2271 if (CLI.CB) {
2272 auto *BB = CLI.CB->getParent();
2273 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2274 count_if(GV->users(), [&BB](const User *U) {
2275 return isa<Instruction>(U) &&
2276 cast<Instruction>(U)->getParent() == BB;
2277 }) > 2;
2278 }
2279 }
2280 if (isTailCall) {
10
Assuming 'isTailCall' is false
11
Taking false branch
2281 // Check if it's really possible to do a tail call.
2282 isTailCall = IsEligibleForTailCallOptimization(
2283 Callee, CallConv, isVarArg, isStructRet,
2284 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2285 PreferIndirect);
2286 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2287 report_fatal_error("failed to perform tail call elimination on a call "
2288 "site marked musttail");
2289 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2290 // detected sibcalls.
2291 if (isTailCall)
2292 ++NumTailCalls;
2293 }
2294
2295 // Analyze operands of the call, assigning locations to each operand.
2296 SmallVector<CCValAssign, 16> ArgLocs;
2297 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2298 *DAG.getContext());
2299 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2300
2301 // Get a count of how many bytes are to be pushed on the stack.
2302 unsigned NumBytes = CCInfo.getNextStackOffset();
2303
2304 if (isTailCall) {
12
Assuming 'isTailCall' is false
13
Taking false branch
2305 // For tail calls, memory operands are available in our caller's stack.
2306 NumBytes = 0;
2307 } else {
2308 // Adjust the stack pointer for the new arguments...
2309 // These operations are automatically eliminated by the prolog/epilog pass
2310 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
2311 }
2312
2313 SDValue StackPtr =
2314 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2315
2316 RegsToPassVector RegsToPass;
2317 SmallVector<SDValue, 8> MemOpChains;
2318
2319 // Walk the register/memloc assignments, inserting copies/loads. In the case
2320 // of tail call optimization, arguments are handled later.
2321 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
15
Loop condition is true. Entering loop body
2322 i != e;
14
Assuming 'i' is not equal to 'e'
2323 ++i, ++realArgIdx) {
2324 CCValAssign &VA = ArgLocs[i];
2325 SDValue Arg = OutVals[realArgIdx];
2326 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2327 bool isByVal = Flags.isByVal();
2328
2329 // Promote the value if needed.
2330 switch (VA.getLocInfo()) {
16
Control jumps to 'case BCvt:' at line 2342
2331 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2331)
;
2332 case CCValAssign::Full: break;
2333 case CCValAssign::SExt:
2334 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2335 break;
2336 case CCValAssign::ZExt:
2337 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2338 break;
2339 case CCValAssign::AExt:
2340 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2341 break;
2342 case CCValAssign::BCvt:
2343 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2344 break;
17
Execution continues on line 2350
2345 }
2346
2347 // f16 arguments have their size extended to 4 bytes and passed as if they
2348 // had been copied to the LSBs of a 32-bit register.
2349 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2350 if (VA.needsCustom() &&
18
Assuming the condition is true
19
Taking false branch
2351 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2352 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2353 } else {
2354 // f16 arguments could have been extended prior to argument lowering.
2355 // Mask them arguments if this is a CMSE nonsecure call.
2356 auto ArgVT = Outs[realArgIdx].ArgVT;
2357 if (isCmseNSCall
19.1
'isCmseNSCall' is false
19.1
'isCmseNSCall' is false
19.1
'isCmseNSCall' is false
&& (ArgVT == MVT::f16)) {
20
Taking false branch
2358 auto LocBits = VA.getLocVT().getSizeInBits();
2359 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2360 SDValue Mask =
2361 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2362 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2363 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2364 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2365 }
2366 }
2367
2368 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2369 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
21
Taking false branch
2370 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2371 DAG.getConstant(0, dl, MVT::i32));
2372 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2373 DAG.getConstant(1, dl, MVT::i32));
2374
2375 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2376 StackPtr, MemOpChains, Flags);
2377
2378 VA = ArgLocs[++i]; // skip ahead to next loc
2379 if (VA.isRegLoc()) {
2380 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2381 StackPtr, MemOpChains, Flags);
2382 } else {
2383 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2383, __PRETTY_FUNCTION__))
;
2384
2385 MemOpChains.push_back(
2386 LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags));
2387 }
2388 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
22
Taking true branch
2389 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
23
Calling 'ARMTargetLowering::PassF64ArgInRegs'
2390 StackPtr, MemOpChains, Flags);
2391 } else if (VA.isRegLoc()) {
2392 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2393 Outs[0].VT == MVT::i32) {
2394 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2395, __PRETTY_FUNCTION__))
2395 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2395, __PRETTY_FUNCTION__))
;
2396 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2397, __PRETTY_FUNCTION__))
2397 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2397, __PRETTY_FUNCTION__))
;
2398 isThisReturn = true;
2399 }
2400 const TargetOptions &Options = DAG.getTarget().Options;
2401 if (Options.EmitCallSiteInfo)
2402 CSInfo.emplace_back(VA.getLocReg(), i);
2403 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2404 } else if (isByVal) {
2405 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2405, __PRETTY_FUNCTION__))
;
2406 unsigned offset = 0;
2407
2408 // True if this byval aggregate will be split between registers
2409 // and memory.
2410 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2411 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2412
2413 if (CurByValIdx < ByValArgsCount) {
2414
2415 unsigned RegBegin, RegEnd;
2416 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2417
2418 EVT PtrVT =
2419 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2420 unsigned int i, j;
2421 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2422 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2423 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2424 SDValue Load =
2425 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2426 DAG.InferPtrAlign(AddArg));
2427 MemOpChains.push_back(Load.getValue(1));
2428 RegsToPass.push_back(std::make_pair(j, Load));
2429 }
2430
2431 // If parameter size outsides register area, "offset" value
2432 // helps us to calculate stack slot for remained part properly.
2433 offset = RegEnd - RegBegin;
2434
2435 CCInfo.nextInRegsParam();
2436 }
2437
2438 if (Flags.getByValSize() > 4*offset) {
2439 auto PtrVT = getPointerTy(DAG.getDataLayout());
2440 unsigned LocMemOffset = VA.getLocMemOffset();
2441 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2442 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
2443 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2444 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2445 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2446 MVT::i32);
2447 SDValue AlignNode =
2448 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2449
2450 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2451 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2452 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2453 Ops));
2454 }
2455 } else if (!isTailCall) {
2456 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2456, __PRETTY_FUNCTION__))
;
2457
2458 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2459 dl, DAG, VA, Flags));
2460 }
2461 }
2462
2463 if (!MemOpChains.empty())
2464 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2465
2466 // Build a sequence of copy-to-reg nodes chained together with token chain
2467 // and flag operands which copy the outgoing args into the appropriate regs.
2468 SDValue InFlag;
2469 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2470 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2471 RegsToPass[i].second, InFlag);
2472 InFlag = Chain.getValue(1);
2473 }
2474
2475 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2476 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2477 // node so that legalize doesn't hack it.
2478 bool isDirect = false;
2479
2480 const TargetMachine &TM = getTargetMachine();
2481 const Module *Mod = MF.getFunction().getParent();
2482 const GlobalValue *GV = nullptr;
2483 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2484 GV = G->getGlobal();
2485 bool isStub =
2486 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2487
2488 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2489 bool isLocalARMFunc = false;
2490 auto PtrVt = getPointerTy(DAG.getDataLayout());
2491
2492 if (Subtarget->genLongCalls()) {
2493 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2494, __PRETTY_FUNCTION__))
2494 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2494, __PRETTY_FUNCTION__))
;
2495 // Handle a global address or an external symbol. If it's not one of
2496 // those, the target's already in a register, so we don't need to do
2497 // anything extra.
2498 if (isa<GlobalAddressSDNode>(Callee)) {
2499 // Create a constant pool entry for the callee address
2500 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2501 ARMConstantPoolValue *CPV =
2502 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2503
2504 // Get the address of the callee into a register
2505 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2506 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2507 Callee = DAG.getLoad(
2508 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2509 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2510 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2511 const char *Sym = S->getSymbol();
2512
2513 // Create a constant pool entry for the callee address
2514 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2515 ARMConstantPoolValue *CPV =
2516 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2517 ARMPCLabelIndex, 0);
2518 // Get the address of the callee into a register
2519 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2520 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2521 Callee = DAG.getLoad(
2522 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2523 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2524 }
2525 } else if (isa<GlobalAddressSDNode>(Callee)) {
2526 if (!PreferIndirect) {
2527 isDirect = true;
2528 bool isDef = GV->isStrongDefinitionForLinker();
2529
2530 // ARM call to a local ARM function is predicable.
2531 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2532 // tBX takes a register source operand.
2533 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2534 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2534, __PRETTY_FUNCTION__))
;
2535 Callee = DAG.getNode(
2536 ARMISD::WrapperPIC, dl, PtrVt,
2537 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2538 Callee = DAG.getLoad(
2539 PtrVt, dl, DAG.getEntryNode(), Callee,
2540 MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
2541 MachineMemOperand::MODereferenceable |
2542 MachineMemOperand::MOInvariant);
2543 } else if (Subtarget->isTargetCOFF()) {
2544 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2545, __PRETTY_FUNCTION__))
2545 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2545, __PRETTY_FUNCTION__))
;
2546 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2547 if (GV->hasDLLImportStorageClass())
2548 TargetFlags = ARMII::MO_DLLIMPORT;
2549 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2550 TargetFlags = ARMII::MO_COFFSTUB;
2551 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2552 TargetFlags);
2553 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2554 Callee =
2555 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2556 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2557 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2558 } else {
2559 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2560 }
2561 }
2562 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2563 isDirect = true;
2564 // tBX takes a register source operand.
2565 const char *Sym = S->getSymbol();
2566 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2567 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2568 ARMConstantPoolValue *CPV =
2569 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2570 ARMPCLabelIndex, 4);
2571 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2572 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2573 Callee = DAG.getLoad(
2574 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2575 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2576 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2577 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2578 } else {
2579 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2580 }
2581 }
2582
2583 if (isCmseNSCall) {
2584 assert(!isARMFunc && !isDirect &&((!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call"
) ? static_cast<void> (0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2585, __PRETTY_FUNCTION__))
2585 "Cannot handle call to ARM function or direct call")((!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call"
) ? static_cast<void> (0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2585, __PRETTY_FUNCTION__))
;
2586 if (NumBytes > 0) {
2587 DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
2588 "call to non-secure function would "
2589 "require passing arguments on stack",
2590 dl.getDebugLoc());
2591 DAG.getContext()->diagnose(Diag);
2592 }
2593 if (isStructRet) {
2594 DiagnosticInfoUnsupported Diag(
2595 DAG.getMachineFunction().getFunction(),
2596 "call to non-secure function would return value through pointer",
2597 dl.getDebugLoc());
2598 DAG.getContext()->diagnose(Diag);
2599 }
2600 }
2601
2602 // FIXME: handle tail calls differently.
2603 unsigned CallOpc;
2604 if (Subtarget->isThumb()) {
2605 if (isCmseNSCall)
2606 CallOpc = ARMISD::tSECALL;
2607 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2608 CallOpc = ARMISD::CALL_NOLINK;
2609 else
2610 CallOpc = ARMISD::CALL;
2611 } else {
2612 if (!isDirect && !Subtarget->hasV5TOps())
2613 CallOpc = ARMISD::CALL_NOLINK;
2614 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2615 // Emit regular call when code size is the priority
2616 !Subtarget->hasMinSize())
2617 // "mov lr, pc; b _foo" to avoid confusing the RSP
2618 CallOpc = ARMISD::CALL_NOLINK;
2619 else
2620 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2621 }
2622
2623 std::vector<SDValue> Ops;
2624 Ops.push_back(Chain);
2625 Ops.push_back(Callee);
2626
2627 // Add argument registers to the end of the list so that they are known live
2628 // into the call.
2629 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2630 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2631 RegsToPass[i].second.getValueType()));
2632
2633 // Add a register mask operand representing the call-preserved registers.
2634 if (!isTailCall) {
2635 const uint32_t *Mask;
2636 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2637 if (isThisReturn) {
2638 // For 'this' returns, use the R0-preserving mask if applicable
2639 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2640 if (!Mask) {
2641 // Set isThisReturn to false if the calling convention is not one that
2642 // allows 'returned' to be modeled in this way, so LowerCallResult does
2643 // not try to pass 'this' straight through
2644 isThisReturn = false;
2645 Mask = ARI->getCallPreservedMask(MF, CallConv);
2646 }
2647 } else
2648 Mask = ARI->getCallPreservedMask(MF, CallConv);
2649
2650 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2650, __PRETTY_FUNCTION__))
;
2651 Ops.push_back(DAG.getRegisterMask(Mask));
2652 }
2653
2654 if (InFlag.getNode())
2655 Ops.push_back(InFlag);
2656
2657 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2658 if (isTailCall) {
2659 MF.getFrameInfo().setHasTailCall();
2660 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2661 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2662 return Ret;
2663 }
2664
2665 // Returns a chain and a flag for retval copy to use.
2666 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2667 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2668 InFlag = Chain.getValue(1);
2669 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2670
2671 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2672 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2673 if (!Ins.empty())
2674 InFlag = Chain.getValue(1);
2675
2676 // Handle result values, copying them out of physregs into vregs that we
2677 // return.
2678 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2679 InVals, isThisReturn,
2680 isThisReturn ? OutVals[0] : SDValue());
2681}
2682
2683/// HandleByVal - Every parameter *after* a byval parameter is passed
2684/// on the stack. Remember the next parameter register to allocate,
2685/// and then confiscate the rest of the parameter registers to insure
2686/// this.
2687void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2688 Align Alignment) const {
2689 // Byval (as with any stack) slots are always at least 4 byte aligned.
2690 Alignment = std::max(Alignment, Align(4));
2691
2692 unsigned Reg = State->AllocateReg(GPRArgRegs);
2693 if (!Reg)
2694 return;
2695
2696 unsigned AlignInRegs = Alignment.value() / 4;
2697 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2698 for (unsigned i = 0; i < Waste; ++i)
2699 Reg = State->AllocateReg(GPRArgRegs);
2700
2701 if (!Reg)
2702 return;
2703
2704 unsigned Excess = 4 * (ARM::R4 - Reg);
2705
2706 // Special case when NSAA != SP and parameter size greater than size of
2707 // all remained GPR regs. In that case we can't split parameter, we must
2708 // send it to stack. We also must set NCRN to R4, so waste all
2709 // remained registers.
2710 const unsigned NSAAOffset = State->getNextStackOffset();
2711 if (NSAAOffset != 0 && Size > Excess) {
2712 while (State->AllocateReg(GPRArgRegs))
2713 ;
2714 return;
2715 }
2716
2717 // First register for byval parameter is the first register that wasn't
2718 // allocated before this method call, so it would be "reg".
2719 // If parameter is small enough to be saved in range [reg, r4), then
2720 // the end (first after last) register would be reg + param-size-in-regs,
2721 // else parameter would be splitted between registers and stack,
2722 // end register would be r4 in this case.
2723 unsigned ByValRegBegin = Reg;
2724 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2725 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2726 // Note, first register is allocated in the beginning of function already,
2727 // allocate remained amount of registers we need.
2728 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2729 State->AllocateReg(GPRArgRegs);
2730 // A byval parameter that is split between registers and memory needs its
2731 // size truncated here.
2732 // In the case where the entire structure fits in registers, we set the
2733 // size in memory to zero.
2734 Size = std::max<int>(Size - Excess, 0);
2735}
2736
2737/// MatchingStackOffset - Return true if the given stack call argument is
2738/// already available in the same position (relatively) of the caller's
2739/// incoming argument stack.
2740static
2741bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2742 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2743 const TargetInstrInfo *TII) {
2744 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2745 int FI = std::numeric_limits<int>::max();
2746 if (Arg.getOpcode() == ISD::CopyFromReg) {
2747 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2748 if (!Register::isVirtualRegister(VR))
2749 return false;
2750 MachineInstr *Def = MRI->getVRegDef(VR);
2751 if (!Def)
2752 return false;
2753 if (!Flags.isByVal()) {
2754 if (!TII->isLoadFromStackSlot(*Def, FI))
2755 return false;
2756 } else {
2757 return false;
2758 }
2759 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2760 if (Flags.isByVal())
2761 // ByVal argument is passed in as a pointer but it's now being
2762 // dereferenced. e.g.
2763 // define @foo(%struct.X* %A) {
2764 // tail call @bar(%struct.X* byval %A)
2765 // }
2766 return false;
2767 SDValue Ptr = Ld->getBasePtr();
2768 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2769 if (!FINode)
2770 return false;
2771 FI = FINode->getIndex();
2772 } else
2773 return false;
2774
2775 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2775, __PRETTY_FUNCTION__))
;
2776 if (!MFI.isFixedObjectIndex(FI))
2777 return false;
2778 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2779}
2780
2781/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2782/// for tail call optimization. Targets which want to do tail call
2783/// optimization should implement this function.
2784bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2785 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2786 bool isCalleeStructRet, bool isCallerStructRet,
2787 const SmallVectorImpl<ISD::OutputArg> &Outs,
2788 const SmallVectorImpl<SDValue> &OutVals,
2789 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
2790 const bool isIndirect) const {
2791 MachineFunction &MF = DAG.getMachineFunction();
2792 const Function &CallerF = MF.getFunction();
2793 CallingConv::ID CallerCC = CallerF.getCallingConv();
2794
2795 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2795, __PRETTY_FUNCTION__))
;
2796
2797 // Indirect tail calls cannot be optimized for Thumb1 if the args
2798 // to the call take up r0-r3. The reason is that there are no legal registers
2799 // left to hold the pointer to the function to be called.
2800 if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2801 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2802 return false;
2803
2804 // Look for obvious safe cases to perform tail call optimization that do not
2805 // require ABI changes. This is what gcc calls sibcall.
2806
2807 // Exception-handling functions need a special set of instructions to indicate
2808 // a return to the hardware. Tail-calling another function would probably
2809 // break this.
2810 if (CallerF.hasFnAttribute("interrupt"))
2811 return false;
2812
2813 // Also avoid sibcall optimization if either caller or callee uses struct
2814 // return semantics.
2815 if (isCalleeStructRet || isCallerStructRet)
2816 return false;
2817
2818 // Externally-defined functions with weak linkage should not be
2819 // tail-called on ARM when the OS does not support dynamic
2820 // pre-emption of symbols, as the AAELF spec requires normal calls
2821 // to undefined weak functions to be replaced with a NOP or jump to the
2822 // next instruction. The behaviour of branch instructions in this
2823 // situation (as used for tail calls) is implementation-defined, so we
2824 // cannot rely on the linker replacing the tail call with a return.
2825 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2826 const GlobalValue *GV = G->getGlobal();
2827 const Triple &TT = getTargetMachine().getTargetTriple();
2828 if (GV->hasExternalWeakLinkage() &&
2829 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2830 return false;
2831 }
2832
2833 // Check that the call results are passed in the same way.
2834 LLVMContext &C = *DAG.getContext();
2835 if (!CCState::resultsCompatible(
2836 getEffectiveCallingConv(CalleeCC, isVarArg),
2837 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2838 CCAssignFnForReturn(CalleeCC, isVarArg),
2839 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
2840 return false;
2841 // The callee has to preserve all registers the caller needs to preserve.
2842 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2843 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2844 if (CalleeCC != CallerCC) {
2845 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2846 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2847 return false;
2848 }
2849
2850 // If Caller's vararg or byval argument has been split between registers and
2851 // stack, do not perform tail call, since part of the argument is in caller's
2852 // local frame.
2853 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2854 if (AFI_Caller->getArgRegsSaveSize())
2855 return false;
2856
2857 // If the callee takes no arguments then go on to check the results of the
2858 // call.
2859 if (!Outs.empty()) {
2860 // Check if stack adjustment is needed. For now, do not do this if any
2861 // argument is passed on the stack.
2862 SmallVector<CCValAssign, 16> ArgLocs;
2863 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2864 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2865 if (CCInfo.getNextStackOffset()) {
2866 // Check if the arguments are already laid out in the right way as
2867 // the caller's fixed stack objects.
2868 MachineFrameInfo &MFI = MF.getFrameInfo();
2869 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2870 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2871 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2872 i != e;
2873 ++i, ++realArgIdx) {
2874 CCValAssign &VA = ArgLocs[i];
2875 EVT RegVT = VA.getLocVT();
2876 SDValue Arg = OutVals[realArgIdx];
2877 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2878 if (VA.getLocInfo() == CCValAssign::Indirect)
2879 return false;
2880 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
2881 // f64 and vector types are split into multiple registers or
2882 // register/stack-slot combinations. The types will not match
2883 // the registers; give up on memory f64 refs until we figure
2884 // out what to do about this.
2885 if (!VA.isRegLoc())
2886 return false;
2887 if (!ArgLocs[++i].isRegLoc())
2888 return false;
2889 if (RegVT == MVT::v2f64) {
2890 if (!ArgLocs[++i].isRegLoc())
2891 return false;
2892 if (!ArgLocs[++i].isRegLoc())
2893 return false;
2894 }
2895 } else if (!VA.isRegLoc()) {
2896 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2897 MFI, MRI, TII))
2898 return false;
2899 }
2900 }
2901 }
2902
2903 const MachineRegisterInfo &MRI = MF.getRegInfo();
2904 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2905 return false;
2906 }
2907
2908 return true;
2909}
2910
2911bool
2912ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2913 MachineFunction &MF, bool isVarArg,
2914 const SmallVectorImpl<ISD::OutputArg> &Outs,
2915 LLVMContext &Context) const {
2916 SmallVector<CCValAssign, 16> RVLocs;
2917 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2918 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2919}
2920
2921static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2922 const SDLoc &DL, SelectionDAG &DAG) {
2923 const MachineFunction &MF = DAG.getMachineFunction();
2924 const Function &F = MF.getFunction();
2925
2926 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2927
2928 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2929 // version of the "preferred return address". These offsets affect the return
2930 // instruction if this is a return from PL1 without hypervisor extensions.
2931 // IRQ/FIQ: +4 "subs pc, lr, #4"
2932 // SWI: 0 "subs pc, lr, #0"
2933 // ABORT: +4 "subs pc, lr, #4"
2934 // UNDEF: +4/+2 "subs pc, lr, #0"
2935 // UNDEF varies depending on where the exception came from ARM or Thumb
2936 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2937
2938 int64_t LROffset;
2939 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2940 IntKind == "ABORT")
2941 LROffset = 4;
2942 else if (IntKind == "SWI" || IntKind == "UNDEF")
2943 LROffset = 0;
2944 else
2945 report_fatal_error("Unsupported interrupt attribute. If present, value "
2946 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2947
2948 RetOps.insert(RetOps.begin() + 1,
2949 DAG.getConstant(LROffset, DL, MVT::i32, false));
2950
2951 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2952}
2953
2954SDValue
2955ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2956 bool isVarArg,
2957 const SmallVectorImpl<ISD::OutputArg> &Outs,
2958 const SmallVectorImpl<SDValue> &OutVals,
2959 const SDLoc &dl, SelectionDAG &DAG) const {
2960 // CCValAssign - represent the assignment of the return value to a location.
2961 SmallVector<CCValAssign, 16> RVLocs;
2962
2963 // CCState - Info about the registers and stack slots.
2964 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2965 *DAG.getContext());
2966
2967 // Analyze outgoing return values.
2968 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2969
2970 SDValue Flag;
2971 SmallVector<SDValue, 4> RetOps;
2972 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2973 bool isLittleEndian = Subtarget->isLittle();
2974
2975 MachineFunction &MF = DAG.getMachineFunction();
2976 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2977 AFI->setReturnRegsCount(RVLocs.size());
2978
2979 // Report error if cmse entry function returns structure through first ptr arg.
2980 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
2981 // Note: using an empty SDLoc(), as the first line of the function is a
2982 // better place to report than the last line.
2983 DiagnosticInfoUnsupported Diag(
2984 DAG.getMachineFunction().getFunction(),
2985 "secure entry function would return value through pointer",
2986 SDLoc().getDebugLoc());
2987 DAG.getContext()->diagnose(Diag);
2988 }
2989
2990 // Copy the result values into the output registers.
2991 for (unsigned i = 0, realRVLocIdx = 0;
2992 i != RVLocs.size();
2993 ++i, ++realRVLocIdx) {
2994 CCValAssign &VA = RVLocs[i];
2995 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 2995, __PRETTY_FUNCTION__))
;
2996
2997 SDValue Arg = OutVals[realRVLocIdx];
2998 bool ReturnF16 = false;
2999
3000 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3001 // Half-precision return values can be returned like this:
3002 //
3003 // t11 f16 = fadd ...
3004 // t12: i16 = bitcast t11
3005 // t13: i32 = zero_extend t12
3006 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3007 //
3008 // to avoid code generation for bitcasts, we simply set Arg to the node
3009 // that produces the f16 value, t11 in this case.
3010 //
3011 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3012 SDValue ZE = Arg.getOperand(0);
3013 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3014 SDValue BC = ZE.getOperand(0);
3015 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3016 Arg = BC.getOperand(0);
3017 ReturnF16 = true;
3018 }
3019 }
3020 }
3021 }
3022
3023 switch (VA.getLocInfo()) {
3024 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3024)
;
3025 case CCValAssign::Full: break;
3026 case CCValAssign::BCvt:
3027 if (!ReturnF16)
3028 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3029 break;
3030 }
3031
3032 // Mask f16 arguments if this is a CMSE nonsecure entry.
3033 auto RetVT = Outs[realRVLocIdx].ArgVT;
3034 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3035 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3036 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3037 } else {
3038 auto LocBits = VA.getLocVT().getSizeInBits();
3039 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3040 SDValue Mask =
3041 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3042 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3043 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3044 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3045 }
3046 }
3047
3048 if (VA.needsCustom() &&
3049 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3050 if (VA.getLocVT() == MVT::v2f64) {
3051 // Extract the first half and return it in two registers.
3052 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3053 DAG.getConstant(0, dl, MVT::i32));
3054 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3055 DAG.getVTList(MVT::i32, MVT::i32), Half);
3056
3057 Chain =
3058 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3059 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3060 Flag = Chain.getValue(1);
3061 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3062 VA = RVLocs[++i]; // skip ahead to next loc
3063 Chain =
3064 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3065 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3066 Flag = Chain.getValue(1);
3067 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3068 VA = RVLocs[++i]; // skip ahead to next loc
3069
3070 // Extract the 2nd half and fall through to handle it as an f64 value.
3071 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3072 DAG.getConstant(1, dl, MVT::i32));
3073 }
3074 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3075 // available.
3076 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3077 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3078 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3079 fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3080 Flag = Chain.getValue(1);
3081 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3082 VA = RVLocs[++i]; // skip ahead to next loc
3083 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3084 fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3085 } else
3086 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3087
3088 // Guarantee that all emitted copies are
3089 // stuck together, avoiding something bad.
3090 Flag = Chain.getValue(1);
3091 RetOps.push_back(DAG.getRegister(
3092 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3093 }
3094 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3095 const MCPhysReg *I =
3096 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3097 if (I) {
3098 for (; *I; ++I) {
3099 if (ARM::GPRRegClass.contains(*I))
3100 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3101 else if (ARM::DPRRegClass.contains(*I))
3102 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3103 else
3104 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3104)
;
3105 }
3106 }
3107
3108 // Update chain and glue.
3109 RetOps[0] = Chain;
3110 if (Flag.getNode())
3111 RetOps.push_back(Flag);
3112
3113 // CPUs which aren't M-class use a special sequence to return from
3114 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3115 // though we use "subs pc, lr, #N").
3116 //
3117 // M-class CPUs actually use a normal return sequence with a special
3118 // (hardware-provided) value in LR, so the normal code path works.
3119 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3120 !Subtarget->isMClass()) {
3121 if (Subtarget->isThumb1Only())
3122 report_fatal_error("interrupt attribute is not supported in Thumb1");
3123 return LowerInterruptReturn(RetOps, dl, DAG);
3124 }
3125
3126 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
3127 ARMISD::RET_FLAG;
3128 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3129}
3130
3131bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3132 if (N->getNumValues() != 1)
3133 return false;
3134 if (!N->hasNUsesOfValue(1, 0))
3135 return false;
3136
3137 SDValue TCChain = Chain;
3138 SDNode *Copy = *N->use_begin();
3139 if (Copy->getOpcode() == ISD::CopyToReg) {
3140 // If the copy has a glue operand, we conservatively assume it isn't safe to
3141 // perform a tail call.
3142 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3143 return false;
3144 TCChain = Copy->getOperand(0);
3145 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3146 SDNode *VMov = Copy;
3147 // f64 returned in a pair of GPRs.
3148 SmallPtrSet<SDNode*, 2> Copies;
3149 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3150 UI != UE; ++UI) {
3151 if (UI->getOpcode() != ISD::CopyToReg)
3152 return false;
3153 Copies.insert(*UI);
3154 }
3155 if (Copies.size() > 2)
3156 return false;
3157
3158 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3159 UI != UE; ++UI) {
3160 SDValue UseChain = UI->getOperand(0);
3161 if (Copies.count(UseChain.getNode()))
3162 // Second CopyToReg
3163 Copy = *UI;
3164 else {
3165 // We are at the top of this chain.
3166 // If the copy has a glue operand, we conservatively assume it
3167 // isn't safe to perform a tail call.
3168 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
3169 return false;
3170 // First CopyToReg
3171 TCChain = UseChain;
3172 }
3173 }
3174 } else if (Copy->getOpcode() == ISD::BITCAST) {
3175 // f32 returned in a single GPR.
3176 if (!Copy->hasOneUse())
3177 return false;
3178 Copy = *Copy->use_begin();
3179 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3180 return false;
3181 // If the copy has a glue operand, we conservatively assume it isn't safe to
3182 // perform a tail call.
3183 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3184 return false;
3185 TCChain = Copy->getOperand(0);
3186 } else {
3187 return false;
3188 }
3189
3190 bool HasRet = false;
3191 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3192 UI != UE; ++UI) {
3193 if (UI->getOpcode() != ARMISD::RET_FLAG &&
3194 UI->getOpcode() != ARMISD::INTRET_FLAG)
3195 return false;
3196 HasRet = true;
3197 }
3198
3199 if (!HasRet)
3200 return false;
3201
3202 Chain = TCChain;
3203 return true;
3204}
3205
3206bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3207 if (!Subtarget->supportsTailCall())
3208 return false;
3209
3210 if (!CI->isTailCall())
3211 return false;
3212
3213 return true;
3214}
3215
3216// Trying to write a 64 bit value so need to split into two 32 bit values first,
3217// and pass the lower and high parts through.
3218static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
3219 SDLoc DL(Op);
3220 SDValue WriteValue = Op->getOperand(2);
3221
3222 // This function is only supposed to be called for i64 type argument.
3223 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3224, __PRETTY_FUNCTION__))
3224 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3224, __PRETTY_FUNCTION__))
;
3225
3226 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3227 DAG.getConstant(0, DL, MVT::i32));
3228 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3229 DAG.getConstant(1, DL, MVT::i32));
3230 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3231 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3232}
3233
3234// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3235// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3236// one of the above mentioned nodes. It has to be wrapped because otherwise
3237// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3238// be used to form addressing mode. These wrapped nodes will be selected
3239// into MOVi.
3240SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 EVT PtrVT = Op.getValueType();
3243 // FIXME there is no actual debug info here
3244 SDLoc dl(Op);
3245 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3246 SDValue Res;
3247
3248 // When generating execute-only code Constant Pools must be promoted to the
3249 // global data section. It's a bit ugly that we can't share them across basic
3250 // blocks, but this way we guarantee that execute-only behaves correct with
3251 // position-independent addressing modes.
3252 if (Subtarget->genExecuteOnly()) {
3253 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3254 auto T = const_cast<Type*>(CP->getType());
3255 auto C = const_cast<Constant*>(CP->getConstVal());
3256 auto M = const_cast<Module*>(DAG.getMachineFunction().
3257 getFunction().getParent());
3258 auto GV = new GlobalVariable(
3259 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3260 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3261 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3262 Twine(AFI->createPICLabelUId())
3263 );
3264 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3265 dl, PtrVT);
3266 return LowerGlobalAddress(GA, DAG);
3267 }
3268
3269 if (CP->isMachineConstantPoolEntry())
3270 Res =
3271 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3272 else
3273 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3274 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3275}
3276
3277unsigned ARMTargetLowering::getJumpTableEncoding() const {
3278 return MachineJumpTableInfo::EK_Inline;
3279}
3280
3281SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3282 SelectionDAG &DAG) const {
3283 MachineFunction &MF = DAG.getMachineFunction();
3284 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3285 unsigned ARMPCLabelIndex = 0;
3286 SDLoc DL(Op);
3287 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3288 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3289 SDValue CPAddr;
3290 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3291 if (!IsPositionIndependent) {
3292 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3293 } else {
3294 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3295 ARMPCLabelIndex = AFI->createPICLabelUId();
3296 ARMConstantPoolValue *CPV =
3297 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3298 ARMCP::CPBlockAddress, PCAdj);
3299 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3300 }
3301 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3302 SDValue Result = DAG.getLoad(
3303 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3304 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3305 if (!IsPositionIndependent)
3306 return Result;
3307 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3308 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3309}
3310
3311/// Convert a TLS address reference into the correct sequence of loads
3312/// and calls to compute the variable's address for Darwin, and return an
3313/// SDValue containing the final node.
3314
3315/// Darwin only has one TLS scheme which must be capable of dealing with the
3316/// fully general situation, in the worst case. This means:
3317/// + "extern __thread" declaration.
3318/// + Defined in a possibly unknown dynamic library.
3319///
3320/// The general system is that each __thread variable has a [3 x i32] descriptor
3321/// which contains information used by the runtime to calculate the address. The
3322/// only part of this the compiler needs to know about is the first word, which
3323/// contains a function pointer that must be called with the address of the
3324/// entire descriptor in "r0".
3325///
3326/// Since this descriptor may be in a different unit, in general access must
3327/// proceed along the usual ARM rules. A common sequence to produce is:
3328///
3329/// movw rT1, :lower16:_var$non_lazy_ptr
3330/// movt rT1, :upper16:_var$non_lazy_ptr
3331/// ldr r0, [rT1]
3332/// ldr rT2, [r0]
3333/// blx rT2
3334/// [...address now in r0...]
3335SDValue
3336ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3337 SelectionDAG &DAG) const {
3338 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3339, __PRETTY_FUNCTION__))
3339 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3339, __PRETTY_FUNCTION__))
;
3340 SDLoc DL(Op);
3341
3342 // First step is to get the address of the actua global symbol. This is where
3343 // the TLS descriptor lives.
3344 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3345
3346 // The first entry in the descriptor is a function pointer that we must call
3347 // to obtain the address of the variable.
3348 SDValue Chain = DAG.getEntryNode();
3349 SDValue FuncTLVGet = DAG.getLoad(
3350 MVT::i32, DL, Chain, DescAddr,
3351 MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
3352 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
3353 MachineMemOperand::MOInvariant);
3354 Chain = FuncTLVGet.getValue(1);
3355
3356 MachineFunction &F = DAG.getMachineFunction();
3357 MachineFrameInfo &MFI = F.getFrameInfo();
3358 MFI.setAdjustsStack(true);
3359
3360 // TLS calls preserve all registers except those that absolutely must be
3361 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3362 // silly).
3363 auto TRI =
3364 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3365 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3366 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3367
3368 // Finally, we can make the call. This is just a degenerate version of a
3369 // normal AArch64 call node: r0 takes the address of the descriptor, and
3370 // returns the address of the variable in this thread.
3371 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3372 Chain =
3373 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3374 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3375 DAG.getRegisterMask(Mask), Chain.getValue(1));
3376 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3377}
3378
3379SDValue
3380ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3381 SelectionDAG &DAG) const {
3382 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3382, __PRETTY_FUNCTION__))
;
3383
3384 SDValue Chain = DAG.getEntryNode();
3385 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3386 SDLoc DL(Op);
3387
3388 // Load the current TEB (thread environment block)
3389 SDValue Ops[] = {Chain,
3390 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3391 DAG.getTargetConstant(15, DL, MVT::i32),
3392 DAG.getTargetConstant(0, DL, MVT::i32),
3393 DAG.getTargetConstant(13, DL, MVT::i32),
3394 DAG.getTargetConstant(0, DL, MVT::i32),
3395 DAG.getTargetConstant(2, DL, MVT::i32)};
3396 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3397 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3398
3399 SDValue TEB = CurrentTEB.getValue(0);
3400 Chain = CurrentTEB.getValue(1);
3401
3402 // Load the ThreadLocalStoragePointer from the TEB
3403 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3404 SDValue TLSArray =
3405 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3406 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3407
3408 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3409 // offset into the TLSArray.
3410
3411 // Load the TLS index from the C runtime
3412 SDValue TLSIndex =
3413 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3414 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3415 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3416
3417 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3418 DAG.getConstant(2, DL, MVT::i32));
3419 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3420 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3421 MachinePointerInfo());
3422
3423 // Get the offset of the start of the .tls section (section base)
3424 const auto *GA = cast<GlobalAddressSDNode>(Op);
3425 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3426 SDValue Offset = DAG.getLoad(
3427 PtrVT, DL, Chain,
3428 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3429 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3430 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3431
3432 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3433}
3434
3435// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3436SDValue
3437ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3438 SelectionDAG &DAG) const {
3439 SDLoc dl(GA);
3440 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3441 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3442 MachineFunction &MF = DAG.getMachineFunction();
3443 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3444 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3445 ARMConstantPoolValue *CPV =
3446 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3447 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3448 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3449 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3450 Argument = DAG.getLoad(
3451 PtrVT, dl, DAG.getEntryNode(), Argument,
3452 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3453 SDValue Chain = Argument.getValue(1);
3454
3455 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3456 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3457
3458 // call __tls_get_addr.
3459 ArgListTy Args;
3460 ArgListEntry Entry;
3461 Entry.Node = Argument;
3462 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3463 Args.push_back(Entry);
3464
3465 // FIXME: is there useful debug info available here?
3466 TargetLowering::CallLoweringInfo CLI(DAG);
3467 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3468 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
3469 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3470
3471 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3472 return CallResult.first;
3473}
3474
3475// Lower ISD::GlobalTLSAddress using the "initial exec" or
3476// "local exec" model.
3477SDValue
3478ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3479 SelectionDAG &DAG,
3480 TLSModel::Model model) const {
3481 const GlobalValue *GV = GA->getGlobal();
3482 SDLoc dl(GA);
3483 SDValue Offset;
3484 SDValue Chain = DAG.getEntryNode();
3485 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3486 // Get the Thread Pointer
3487 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3488
3489 if (model == TLSModel::InitialExec) {
3490 MachineFunction &MF = DAG.getMachineFunction();
3491 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3492 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3493 // Initial exec model.
3494 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3495 ARMConstantPoolValue *CPV =
3496 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3497 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
3498 true);
3499 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3500 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3501 Offset = DAG.getLoad(
3502 PtrVT, dl, Chain, Offset,
3503 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3504 Chain = Offset.getValue(1);
3505
3506 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3507 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3508
3509 Offset = DAG.getLoad(
3510 PtrVT, dl, Chain, Offset,
3511 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3512 } else {
3513 // local exec model
3514 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3514, __PRETTY_FUNCTION__))
;
3515 ARMConstantPoolValue *CPV =
3516 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3517 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3518 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3519 Offset = DAG.getLoad(
3520 PtrVT, dl, Chain, Offset,
3521 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3522 }
3523
3524 // The address of the thread local variable is the add of the thread
3525 // pointer with the offset of the variable.
3526 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3527}
3528
3529SDValue
3530ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3531 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3532 if (DAG.getTarget().useEmulatedTLS())
3533 return LowerToTLSEmulatedModel(GA, DAG);
3534
3535 if (Subtarget->isTargetDarwin())
3536 return LowerGlobalTLSAddressDarwin(Op, DAG);
3537
3538 if (Subtarget->isTargetWindows())
3539 return LowerGlobalTLSAddressWindows(Op, DAG);
3540
3541 // TODO: implement the "local dynamic" model
3542 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3542, __PRETTY_FUNCTION__))
;
3543 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3544
3545 switch (model) {
3546 case TLSModel::GeneralDynamic:
3547 case TLSModel::LocalDynamic:
3548 return LowerToTLSGeneralDynamicModel(GA, DAG);
3549 case TLSModel::InitialExec:
3550 case TLSModel::LocalExec:
3551 return LowerToTLSExecModels(GA, DAG, model);
3552 }
3553 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3553)
;
3554}
3555
3556/// Return true if all users of V are within function F, looking through
3557/// ConstantExprs.
3558static bool allUsersAreInFunction(const Value *V, const Function *F) {
3559 SmallVector<const User*,4> Worklist(V->users());
3560 while (!Worklist.empty()) {
3561 auto *U = Worklist.pop_back_val();
3562 if (isa<ConstantExpr>(U)) {
3563 append_range(Worklist, U->users());
3564 continue;
3565 }
3566
3567 auto *I = dyn_cast<Instruction>(U);
3568 if (!I || I->getParent()->getParent() != F)
3569 return false;
3570 }
3571 return true;
3572}
3573
3574static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3575 const GlobalValue *GV, SelectionDAG &DAG,
3576 EVT PtrVT, const SDLoc &dl) {
3577 // If we're creating a pool entry for a constant global with unnamed address,
3578 // and the global is small enough, we can emit it inline into the constant pool
3579 // to save ourselves an indirection.
3580 //
3581 // This is a win if the constant is only used in one function (so it doesn't
3582 // need to be duplicated) or duplicating the constant wouldn't increase code
3583 // size (implying the constant is no larger than 4 bytes).
3584 const Function &F = DAG.getMachineFunction().getFunction();
3585
3586 // We rely on this decision to inline being idemopotent and unrelated to the
3587 // use-site. We know that if we inline a variable at one use site, we'll
3588 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3589 // doesn't know about this optimization, so bail out if it's enabled else
3590 // we could decide to inline here (and thus never emit the GV) but require
3591 // the GV from fast-isel generated code.
3592 if (!EnableConstpoolPromotion ||
3593 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3594 return SDValue();
3595
3596 auto *GVar = dyn_cast<GlobalVariable>(GV);
3597 if (!GVar || !GVar->hasInitializer() ||
3598 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3599 !GVar->hasLocalLinkage())
3600 return SDValue();
3601
3602 // If we inline a value that contains relocations, we move the relocations
3603 // from .data to .text. This is not allowed in position-independent code.
3604 auto *Init = GVar->getInitializer();
3605 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3606 Init->needsDynamicRelocation())
3607 return SDValue();
3608
3609 // The constant islands pass can only really deal with alignment requests
3610 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3611 // any type wanting greater alignment requirements than 4 bytes. We also
3612 // can only promote constants that are multiples of 4 bytes in size or
3613 // are paddable to a multiple of 4. Currently we only try and pad constants
3614 // that are strings for simplicity.
3615 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3616 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3617 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3618 unsigned RequiredPadding = 4 - (Size % 4);
3619 bool PaddingPossible =
3620 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3621 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3622 Size == 0)
3623 return SDValue();
3624
3625 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3626 MachineFunction &MF = DAG.getMachineFunction();
3627 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3628
3629 // We can't bloat the constant pool too much, else the ConstantIslands pass
3630 // may fail to converge. If we haven't promoted this global yet (it may have
3631 // multiple uses), and promoting it would increase the constant pool size (Sz
3632 // > 4), ensure we have space to do so up to MaxTotal.
3633 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3634 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3635 ConstpoolPromotionMaxTotal)
3636 return SDValue();
3637
3638 // This is only valid if all users are in a single function; we can't clone
3639 // the constant in general. The LLVM IR unnamed_addr allows merging
3640 // constants, but not cloning them.
3641 //
3642 // We could potentially allow cloning if we could prove all uses of the
3643 // constant in the current function don't care about the address, like
3644 // printf format strings. But that isn't implemented for now.
3645 if (!allUsersAreInFunction(GVar, &F))
3646 return SDValue();
3647
3648 // We're going to inline this global. Pad it out if needed.
3649 if (RequiredPadding != 4) {
3650 StringRef S = CDAInit->getAsString();
3651
3652 SmallVector<uint8_t,16> V(S.size());
3653 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3654 while (RequiredPadding--)
3655 V.push_back(0);
3656 Init = ConstantDataArray::get(*DAG.getContext(), V);
3657 }
3658
3659 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3660 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3661 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3662 AFI->markGlobalAsPromotedToConstantPool(GVar);
3663 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3664 PaddedSize - 4);
3665 }
3666 ++NumConstpoolPromoted;
3667 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3668}
3669
3670bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3671 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3672 if (!(GV = GA->getBaseObject()))
3673 return false;
3674 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3675 return V->isConstant();
3676 return isa<Function>(GV);
3677}
3678
3679SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3680 SelectionDAG &DAG) const {
3681 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3682 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3682)
;
3683 case Triple::COFF:
3684 return LowerGlobalAddressWindows(Op, DAG);
3685 case Triple::ELF:
3686 return LowerGlobalAddressELF(Op, DAG);
3687 case Triple::MachO:
3688 return LowerGlobalAddressDarwin(Op, DAG);
3689 }
3690}
3691
3692SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3693 SelectionDAG &DAG) const {
3694 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3695 SDLoc dl(Op);
3696 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3697 const TargetMachine &TM = getTargetMachine();
3698 bool IsRO = isReadOnly(GV);
3699
3700 // promoteToConstantPool only if not generating XO text section
3701 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3702 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3703 return V;
3704
3705 if (isPositionIndependent()) {
3706 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3707 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3708 UseGOT_PREL ? ARMII::MO_GOT : 0);
3709 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3710 if (UseGOT_PREL)
3711 Result =
3712 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3713 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3714 return Result;
3715 } else if (Subtarget->isROPI() && IsRO) {
3716 // PC-relative.
3717 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3718 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3719 return Result;
3720 } else if (Subtarget->isRWPI() && !IsRO) {
3721 // SB-relative.
3722 SDValue RelAddr;
3723 if (Subtarget->useMovt()) {
3724 ++NumMovwMovt;
3725 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3726 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3727 } else { // use literal pool for address constant
3728 ARMConstantPoolValue *CPV =
3729 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3730 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3731 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3732 RelAddr = DAG.getLoad(
3733 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3734 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3735 }
3736 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3737 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3738 return Result;
3739 }
3740
3741 // If we have T2 ops, we can materialize the address directly via movt/movw
3742 // pair. This is always cheaper.
3743 if (Subtarget->useMovt()) {
3744 ++NumMovwMovt;
3745 // FIXME: Once remat is capable of dealing with instructions with register
3746 // operands, expand this into two nodes.
3747 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3748 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3749 } else {
3750 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3751 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3752 return DAG.getLoad(
3753 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3754 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3755 }
3756}
3757
3758SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
3761 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
;
3762 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3763 SDLoc dl(Op);
3764 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3765
3766 if (Subtarget->useMovt())
3767 ++NumMovwMovt;
3768
3769 // FIXME: Once remat is capable of dealing with instructions with register
3770 // operands, expand this into multiple nodes
3771 unsigned Wrapper =
3772 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3773
3774 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3775 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3776
3777 if (Subtarget->isGVIndirectSymbol(GV))
3778 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3779 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3780 return Result;
3781}
3782
3783SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3784 SelectionDAG &DAG) const {
3785 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3785, __PRETTY_FUNCTION__))
;
3786 assert(Subtarget->useMovt() &&((Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3787, __PRETTY_FUNCTION__))
3787 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3787, __PRETTY_FUNCTION__))
;
3788 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3789, __PRETTY_FUNCTION__))
3789 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3789, __PRETTY_FUNCTION__))
;
3790
3791 const TargetMachine &TM = getTargetMachine();
3792 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3793 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3794 if (GV->hasDLLImportStorageClass())
3795 TargetFlags = ARMII::MO_DLLIMPORT;
3796 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3797 TargetFlags = ARMII::MO_COFFSTUB;
3798 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3799 SDValue Result;
3800 SDLoc DL(Op);
3801
3802 ++NumMovwMovt;
3803
3804 // FIXME: Once remat is capable of dealing with instructions with register
3805 // operands, expand this into two nodes.
3806 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3807 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3808 TargetFlags));
3809 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3810 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3811 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3812 return Result;
3813}
3814
3815SDValue
3816ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3817 SDLoc dl(Op);
3818 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3819 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3820 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3821 Op.getOperand(1), Val);
3822}
3823
3824SDValue
3825ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3826 SDLoc dl(Op);
3827 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3828 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3829}
3830
3831SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3832 SelectionDAG &DAG) const {
3833 SDLoc dl(Op);
3834 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3835 Op.getOperand(0));
3836}
3837
3838SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3839 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3840 unsigned IntNo =
3841 cast<ConstantSDNode>(
3842 Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
3843 ->getZExtValue();
3844 switch (IntNo) {
3845 default:
3846 return SDValue(); // Don't custom lower most intrinsics.
3847 case Intrinsic::arm_gnu_eabi_mcount: {
3848 MachineFunction &MF = DAG.getMachineFunction();
3849 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3850 SDLoc dl(Op);
3851 SDValue Chain = Op.getOperand(0);
3852 // call "\01__gnu_mcount_nc"
3853 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3854 const uint32_t *Mask =
3855 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3856 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 3856, __PRETTY_FUNCTION__))
;
3857 // Mark LR an implicit live-in.
3858 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3859 SDValue ReturnAddress =
3860 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3861 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3862 SDValue Callee =
3863 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3864 SDValue RegisterMask = DAG.getRegisterMask(Mask);
3865 if (Subtarget->isThumb())
3866 return SDValue(
3867 DAG.getMachineNode(
3868 ARM::tBL_PUSHLR, dl, ResultTys,
3869 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
3870 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
3871 0);
3872 return SDValue(
3873 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
3874 {ReturnAddress, Callee, RegisterMask, Chain}),
3875 0);
3876 }
3877 }
3878}
3879
3880SDValue
3881ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3882 const ARMSubtarget *Subtarget) const {
3883 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3884 SDLoc dl(Op);
3885 switch (IntNo) {
3886 default: return SDValue(); // Don't custom lower most intrinsics.
3887 case Intrinsic::thread_pointer: {
3888 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3889 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3890 }
3891 case Intrinsic::arm_cls: {
3892 const SDValue &Operand = Op.getOperand(1);
3893 const EVT VTy = Op.getValueType();
3894 SDValue SRA =
3895 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
3896 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
3897 SDValue SHL =
3898 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
3899 SDValue OR =
3900 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
3901 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
3902 return Result;
3903 }
3904 case Intrinsic::arm_cls64: {
3905 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
3906 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
3907 const SDValue &Operand = Op.getOperand(1);
3908 const EVT VTy = Op.getValueType();
3909
3910 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3911 DAG.getConstant(1, dl, VTy));
3912 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
3913 DAG.getConstant(0, dl, VTy));
3914 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
3915 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
3916 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
3917 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
3918 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
3919 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
3920 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
3921 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
3922 SDValue CheckLo =
3923 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
3924 SDValue HiIsZero =
3925 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
3926 SDValue AdjustedLo =
3927 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
3928 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
3929 SDValue Result =
3930 DAG.getSelect(dl, VTy, CheckLo,
3931 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
3932 return Result;
3933 }
3934 case Intrinsic::eh_sjlj_lsda: {
3935 MachineFunction &MF = DAG.getMachineFunction();
3936 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3937 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3938 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3939 SDValue CPAddr;
3940 bool IsPositionIndependent = isPositionIndependent();
3941 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3942 ARMConstantPoolValue *CPV =
3943 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3944 ARMCP::CPLSDA, PCAdj);
3945 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3946 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3947 SDValue Result = DAG.getLoad(
3948 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3949 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3950
3951 if (IsPositionIndependent) {
3952 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3953 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3954 }
3955 return Result;
3956 }
3957 case Intrinsic::arm_neon_vabs:
3958 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3959 Op.getOperand(1));
3960 case Intrinsic::arm_neon_vmulls:
3961 case Intrinsic::arm_neon_vmullu: {
3962 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3963 ? ARMISD::VMULLs : ARMISD::VMULLu;
3964 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3965 Op.getOperand(1), Op.getOperand(2));
3966 }
3967 case Intrinsic::arm_neon_vminnm:
3968 case Intrinsic::arm_neon_vmaxnm: {
3969 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3970 ? ISD::FMINNUM : ISD::FMAXNUM;
3971 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2));
3973 }
3974 case Intrinsic::arm_neon_vminu:
3975 case Intrinsic::arm_neon_vmaxu: {
3976 if (Op.getValueType().isFloatingPoint())
3977 return SDValue();
3978 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3979 ? ISD::UMIN : ISD::UMAX;
3980 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 }
3983 case Intrinsic::arm_neon_vmins:
3984 case Intrinsic::arm_neon_vmaxs: {
3985 // v{min,max}s is overloaded between signed integers and floats.
3986 if (!Op.getValueType().isFloatingPoint()) {
3987 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3988 ? ISD::SMIN : ISD::SMAX;
3989 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 }
3992 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3993 ? ISD::FMINIMUM : ISD::FMAXIMUM;
3994 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3995 Op.getOperand(1), Op.getOperand(2));
3996 }
3997 case Intrinsic::arm_neon_vtbl1:
3998 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::arm_neon_vtbl2:
4001 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4003 case Intrinsic::arm_mve_pred_i2v:
4004 case Intrinsic::arm_mve_pred_v2i:
4005 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4006 Op.getOperand(1));
4007 case Intrinsic::arm_mve_vreinterpretq:
4008 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4009 Op.getOperand(1));
4010 case Intrinsic::arm_mve_lsll:
4011 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4012 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4013 case Intrinsic::arm_mve_asrl:
4014 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4015 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4016 }
4017}
4018
4019static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4020 const ARMSubtarget *Subtarget) {
4021 SDLoc dl(Op);
4022 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4023 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4024 if (SSID == SyncScope::SingleThread)
4025 return Op;
4026
4027 if (!Subtarget->hasDataBarrier()) {
4028 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4029 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4030 // here.
4031 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
4032 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
;
4033 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4034 DAG.getConstant(0, dl, MVT::i32));
4035 }
4036
4037 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4038 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4039 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
4040 if (Subtarget->isMClass()) {
4041 // Only a full system barrier exists in the M-class architectures.
4042 Domain = ARM_MB::SY;
4043 } else if (Subtarget->preferISHSTBarriers() &&
4044 Ord == AtomicOrdering::Release) {
4045 // Swift happens to implement ISHST barriers in a way that's compatible with
4046 // Release semantics but weaker than ISH so we'd be fools not to use
4047 // it. Beware: other processors probably don't!
4048 Domain = ARM_MB::ISHST;
4049 }
4050
4051 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4052 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4053 DAG.getConstant(Domain, dl, MVT::i32));
4054}
4055
4056static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
4057 const ARMSubtarget *Subtarget) {
4058 // ARM pre v5TE and Thumb1 does not have preload instructions.
4059 if (!(Subtarget->isThumb2() ||
4060 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4061 // Just preserve the chain.
4062 return Op.getOperand(0);
4063
4064 SDLoc dl(Op);
4065 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4066 if (!isRead &&
4067 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4068 // ARMv7 with MP extension has PLDW.
4069 return Op.getOperand(0);
4070
4071 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4072 if (Subtarget->isThumb()) {
4073 // Invert the bits.
4074 isRead = ~isRead & 1;
4075 isData = ~isData & 1;
4076 }
4077
4078 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4079 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4080 DAG.getConstant(isData, dl, MVT::i32));
4081}
4082
4083static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
4084 MachineFunction &MF = DAG.getMachineFunction();
4085 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4086
4087 // vastart just stores the address of the VarArgsFrameIndex slot into the
4088 // memory location argument.
4089 SDLoc dl(Op);
4090 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4091 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4092 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4093 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4094 MachinePointerInfo(SV));
4095}
4096
4097SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4098 CCValAssign &NextVA,
4099 SDValue &Root,
4100 SelectionDAG &DAG,
4101 const SDLoc &dl) const {
4102 MachineFunction &MF = DAG.getMachineFunction();
4103 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4104
4105 const TargetRegisterClass *RC;
4106 if (AFI->isThumb1OnlyFunction())
4107 RC = &ARM::tGPRRegClass;
4108 else
4109 RC = &ARM::GPRRegClass;
4110
4111 // Transform the arguments stored in physical registers into virtual ones.
4112 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4113 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4114
4115 SDValue ArgValue2;
4116 if (NextVA.isMemLoc()) {
4117 MachineFrameInfo &MFI = MF.getFrameInfo();
4118 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4119
4120 // Create load node to retrieve arguments from the stack.
4121 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4122 ArgValue2 = DAG.getLoad(
4123 MVT::i32, dl, Root, FIN,
4124 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4125 } else {
4126 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4127 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4128 }
4129 if (!Subtarget->isLittle())
4130 std::swap (ArgValue, ArgValue2);
4131 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4132}
4133
4134// The remaining GPRs hold either the beginning of variable-argument
4135// data, or the beginning of an aggregate passed by value (usually
4136// byval). Either way, we allocate stack slots adjacent to the data
4137// provided by our caller, and store the unallocated registers there.
4138// If this is a variadic function, the va_list pointer will begin with
4139// these values; otherwise, this reassembles a (byval) structure that
4140// was split between registers and memory.
4141// Return: The frame index registers were stored into.
4142int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4143 const SDLoc &dl, SDValue &Chain,
4144 const Value *OrigArg,
4145 unsigned InRegsParamRecordIdx,
4146 int ArgOffset, unsigned ArgSize) const {
4147 // Currently, two use-cases possible:
4148 // Case #1. Non-var-args function, and we meet first byval parameter.
4149 // Setup first unallocated register as first byval register;
4150 // eat all remained registers
4151 // (these two actions are performed by HandleByVal method).
4152 // Then, here, we initialize stack frame with
4153 // "store-reg" instructions.
4154 // Case #2. Var-args function, that doesn't contain byval parameters.
4155 // The same: eat all remained unallocated registers,
4156 // initialize stack frame.
4157
4158 MachineFunction &MF = DAG.getMachineFunction();
4159 MachineFrameInfo &MFI = MF.getFrameInfo();
4160 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4161 unsigned RBegin, REnd;
4162 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4163 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4164 } else {
4165 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4166 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4167 REnd = ARM::R4;
4168 }
4169
4170 if (REnd != RBegin)
4171 ArgOffset = -4 * (ARM::R4 - RBegin);
4172
4173 auto PtrVT = getPointerTy(DAG.getDataLayout());
4174 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4175 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4176
4177 SmallVector<SDValue, 4> MemOps;
4178 const TargetRegisterClass *RC =
4179 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4180
4181 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4182 unsigned VReg = MF.addLiveIn(Reg, RC);
4183 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4184 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4185 MachinePointerInfo(OrigArg, 4 * i));
4186 MemOps.push_back(Store);
4187 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4188 }
4189
4190 if (!MemOps.empty())
4191 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4192 return FrameIndex;
4193}
4194
4195// Setup stack frame, the va_list pointer will start from.
4196void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4197 const SDLoc &dl, SDValue &Chain,
4198 unsigned ArgOffset,
4199 unsigned TotalArgRegsSaveSize,
4200 bool ForceMutable) const {
4201 MachineFunction &MF = DAG.getMachineFunction();
4202 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4203
4204 // Try to store any remaining integer argument regs
4205 // to their spots on the stack so that they may be loaded by dereferencing
4206 // the result of va_next.
4207 // If there is no regs to be stored, just point address after last
4208 // argument passed via stack.
4209 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4210 CCInfo.getInRegsParamsCount(),
4211 CCInfo.getNextStackOffset(),
4212 std::max(4U, TotalArgRegsSaveSize));
4213 AFI->setVarArgsFrameIndex(FrameIndex);
4214}
4215
4216bool ARMTargetLowering::splitValueIntoRegisterParts(
4217 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4218 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
4219 bool IsABIRegCopy = CC.hasValue();
4220 EVT ValueVT = Val.getValueType();
4221 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4222 PartVT == MVT::f32) {
4223 unsigned ValueBits = ValueVT.getSizeInBits();
4224 unsigned PartBits = PartVT.getSizeInBits();
4225 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4226 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4227 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4228 Parts[0] = Val;
4229 return true;
4230 }
4231 return false;
4232}
4233
4234SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4235 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4236 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
4237 bool IsABIRegCopy = CC.hasValue();
4238 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4239 PartVT == MVT::f32) {
4240 unsigned ValueBits = ValueVT.getSizeInBits();
4241 unsigned PartBits = PartVT.getSizeInBits();
4242 SDValue Val = Parts[0];
4243
4244 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4245 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4246 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4247 return Val;
4248 }
4249 return SDValue();
4250}
4251
4252SDValue ARMTargetLowering::LowerFormalArguments(
4253 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4254 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4255 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4256 MachineFunction &MF = DAG.getMachineFunction();
4257 MachineFrameInfo &MFI = MF.getFrameInfo();
4258
4259 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4260
4261 // Assign locations to all of the incoming arguments.
4262 SmallVector<CCValAssign, 16> ArgLocs;
4263 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4264 *DAG.getContext());
4265 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4266
4267 SmallVector<SDValue, 16> ArgValues;
4268 SDValue ArgValue;
4269 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4270 unsigned CurArgIdx = 0;
4271
4272 // Initially ArgRegsSaveSize is zero.
4273 // Then we increase this value each time we meet byval parameter.
4274 // We also increase this value in case of varargs function.
4275 AFI->setArgRegsSaveSize(0);
4276
4277 // Calculate the amount of stack space that we need to allocate to store
4278 // byval and variadic arguments that are passed in registers.
4279 // We need to know this before we allocate the first byval or variadic
4280 // argument, as they will be allocated a stack slot below the CFA (Canonical
4281 // Frame Address, the stack pointer at entry to the function).
4282 unsigned ArgRegBegin = ARM::R4;
4283 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4284 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4285 break;
4286
4287 CCValAssign &VA = ArgLocs[i];
4288 unsigned Index = VA.getValNo();
4289 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4290 if (!Flags.isByVal())
4291 continue;
4292
4293 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4293, __PRETTY_FUNCTION__))
;
4294 unsigned RBegin, REnd;
4295 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4296 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4297
4298 CCInfo.nextInRegsParam();
4299 }
4300 CCInfo.rewindByValRegsInfo();
4301
4302 int lastInsIndex = -1;
4303 if (isVarArg && MFI.hasVAStart()) {
4304 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4305 if (RegIdx != array_lengthof(GPRArgRegs))
4306 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4307 }
4308
4309 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4310 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4311 auto PtrVT = getPointerTy(DAG.getDataLayout());
4312
4313 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4314 CCValAssign &VA = ArgLocs[i];
4315 if (Ins[VA.getValNo()].isOrigArg()) {
4316 std::advance(CurOrigArg,
4317 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4318 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4319 }
4320 // Arguments stored in registers.
4321 if (VA.isRegLoc()) {
4322 EVT RegVT = VA.getLocVT();
4323
4324 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4325 // f64 and vector types are split up into multiple registers or
4326 // combinations of registers and stack slots.
4327 SDValue ArgValue1 =
4328 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4329 VA = ArgLocs[++i]; // skip ahead to next loc
4330 SDValue ArgValue2;
4331 if (VA.isMemLoc()) {
4332 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4333 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4334 ArgValue2 = DAG.getLoad(
4335 MVT::f64, dl, Chain, FIN,
4336 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4337 } else {
4338 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4339 }
4340 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4341 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4342 ArgValue1, DAG.getIntPtrConstant(0, dl));
4343 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4344 ArgValue2, DAG.getIntPtrConstant(1, dl));
4345 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4346 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4347 } else {
4348 const TargetRegisterClass *RC;
4349
4350 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4351 RC = &ARM::HPRRegClass;
4352 else if (RegVT == MVT::f32)
4353 RC = &ARM::SPRRegClass;
4354 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4355 RegVT == MVT::v4bf16)
4356 RC = &ARM::DPRRegClass;
4357 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4358 RegVT == MVT::v8bf16)
4359 RC = &ARM::QPRRegClass;
4360 else if (RegVT == MVT::i32)
4361 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4362 : &ARM::GPRRegClass;
4363 else
4364 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4364)
;
4365
4366 // Transform the arguments in physical registers into virtual ones.
4367 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4368 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4369
4370 // If this value is passed in r0 and has the returned attribute (e.g.
4371 // C++ 'structors), record this fact for later use.
4372 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4373 AFI->setPreservesR0();
4374 }
4375 }
4376
4377 // If this is an 8 or 16-bit value, it is really passed promoted
4378 // to 32 bits. Insert an assert[sz]ext to capture this, then
4379 // truncate to the right size.
4380 switch (VA.getLocInfo()) {
4381 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4381)
;
4382 case CCValAssign::Full: break;
4383 case CCValAssign::BCvt:
4384 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4385 break;
4386 case CCValAssign::SExt:
4387 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4388 DAG.getValueType(VA.getValVT()));
4389 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4390 break;
4391 case CCValAssign::ZExt:
4392 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4393 DAG.getValueType(VA.getValVT()));
4394 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4395 break;
4396 }
4397
4398 // f16 arguments have their size extended to 4 bytes and passed as if they
4399 // had been copied to the LSBs of a 32-bit register.
4400 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4401 if (VA.needsCustom() &&
4402 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4403 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4404
4405 InVals.push_back(ArgValue);
4406 } else { // VA.isRegLoc()
4407 // sanity check
4408 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4408, __PRETTY_FUNCTION__))
;
4409 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4409, __PRETTY_FUNCTION__))
;
4410
4411 int index = VA.getValNo();
4412
4413 // Some Ins[] entries become multiple ArgLoc[] entries.
4414 // Process them only once.
4415 if (index != lastInsIndex)
4416 {
4417 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4418 // FIXME: For now, all byval parameter objects are marked mutable.
4419 // This can be changed with more analysis.
4420 // In case of tail call optimization mark all arguments mutable.
4421 // Since they could be overwritten by lowering of arguments in case of
4422 // a tail call.
4423 if (Flags.isByVal()) {
4424 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4425, __PRETTY_FUNCTION__))
4425 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4425, __PRETTY_FUNCTION__))
;
4426 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4427
4428 int FrameIndex = StoreByValRegs(
4429 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4430 VA.getLocMemOffset(), Flags.getByValSize());
4431 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4432 CCInfo.nextInRegsParam();
4433 } else {
4434 unsigned FIOffset = VA.getLocMemOffset();
4435 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4436 FIOffset, true);
4437
4438 // Create load nodes to retrieve arguments from the stack.
4439 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4440 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4441 MachinePointerInfo::getFixedStack(
4442 DAG.getMachineFunction(), FI)));
4443 }
4444 lastInsIndex = index;
4445 }
4446 }
4447 }
4448
4449 // varargs
4450 if (isVarArg && MFI.hasVAStart()) {
4451 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4452 TotalArgRegsSaveSize);
4453 if (AFI->isCmseNSEntryFunction()) {
4454 DiagnosticInfoUnsupported Diag(
4455 DAG.getMachineFunction().getFunction(),
4456 "secure entry function must not be variadic", dl.getDebugLoc());
4457 DAG.getContext()->diagnose(Diag);
4458 }
4459 }
4460
4461 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
4462
4463 if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4464 DiagnosticInfoUnsupported Diag(
4465 DAG.getMachineFunction().getFunction(),
4466 "secure entry function requires arguments on stack", dl.getDebugLoc());
4467 DAG.getContext()->diagnose(Diag);
4468 }
4469
4470 return Chain;
4471}
4472
4473/// isFloatingPointZero - Return true if this is +0.0.
4474static bool isFloatingPointZero(SDValue Op) {
4475 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4476 return CFP->getValueAPF().isPosZero();
4477 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4478 // Maybe this has already been legalized into the constant pool?
4479 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4480 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4481 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4482 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4483 return CFP->getValueAPF().isPosZero();
4484 }
4485 } else if (Op->getOpcode() == ISD::BITCAST &&
4486 Op->getValueType(0) == MVT::f64) {
4487 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4488 // created by LowerConstantFP().
4489 SDValue BitcastOp = Op->getOperand(0);
4490 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4491 isNullConstant(BitcastOp->getOperand(0)))
4492 return true;
4493 }
4494 return false;
4495}
4496
4497/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4498/// the given operands.
4499SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4500 SDValue &ARMcc, SelectionDAG &DAG,
4501 const SDLoc &dl) const {
4502 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4503 unsigned C = RHSC->getZExtValue();
4504 if (!isLegalICmpImmediate((int32_t)C)) {
4505 // Constant does not fit, try adjusting it by one.
4506 switch (CC) {
4507 default: break;
4508 case ISD::SETLT:
4509 case ISD::SETGE:
4510 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4511 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4512 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4513 }
4514 break;
4515 case ISD::SETULT:
4516 case ISD::SETUGE:
4517 if (C != 0 && isLegalICmpImmediate(C-1)) {
4518 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4519 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4520 }
4521 break;
4522 case ISD::SETLE:
4523 case ISD::SETGT:
4524 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4525 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4526 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4527 }
4528 break;
4529 case ISD::SETULE:
4530 case ISD::SETUGT:
4531 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4532 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4533 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4534 }
4535 break;
4536 }
4537 }
4538 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4539 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4540 // In ARM and Thumb-2, the compare instructions can shift their second
4541 // operand.
4542 CC = ISD::getSetCCSwappedOperands(CC);
4543 std::swap(LHS, RHS);
4544 }
4545
4546 // Thumb1 has very limited immediate modes, so turning an "and" into a
4547 // shift can save multiple instructions.
4548 //
4549 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4550 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4551 // own. If it's the operand to an unsigned comparison with an immediate,
4552 // we can eliminate one of the shifts: we transform
4553 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4554 //
4555 // We avoid transforming cases which aren't profitable due to encoding
4556 // details:
4557 //
4558 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4559 // would not; in that case, we're essentially trading one immediate load for
4560 // another.
4561 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4562 // 3. C2 is zero; we have other code for this special case.
4563 //
4564 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4565 // instruction, since the AND is always one instruction anyway, but we could
4566 // use narrow instructions in some cases.
4567 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4568 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4569 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4570 !isSignedIntSetCC(CC)) {
4571 unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4572 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4573 uint64_t RHSV = RHSC->getZExtValue();
4574 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4575 unsigned ShiftBits = countLeadingZeros(Mask);
4576 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4577 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4578 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4579 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4580 }
4581 }
4582 }
4583
4584 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4585 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4586 // way a cmp would.
4587 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4588 // some tweaks to the heuristics for the previous and->shift transform.
4589 // FIXME: Optimize cases where the LHS isn't a shift.
4590 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4591 isa<ConstantSDNode>(RHS) &&
4592 cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4593 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4594 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4595 unsigned ShiftAmt =
4596 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4597 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4598 DAG.getVTList(MVT::i32, MVT::i32),
4599 LHS.getOperand(0),
4600 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4601 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4602 Shift.getValue(1), SDValue());
4603 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4604 return Chain.getValue(1);
4605 }
4606
4607 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4608
4609 // If the RHS is a constant zero then the V (overflow) flag will never be
4610 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4611 // simpler for other passes (like the peephole optimiser) to deal with.
4612 if (isNullConstant(RHS)) {
4613 switch (CondCode) {
4614 default: break;
4615 case ARMCC::GE:
4616 CondCode = ARMCC::PL;
4617 break;
4618 case ARMCC::LT:
4619 CondCode = ARMCC::MI;
4620 break;
4621 }
4622 }
4623
4624 ARMISD::NodeType CompareType;
4625 switch (CondCode) {
4626 default:
4627 CompareType = ARMISD::CMP;
4628 break;
4629 case ARMCC::EQ:
4630 case ARMCC::NE:
4631 // Uses only Z Flag
4632 CompareType = ARMISD::CMPZ;
4633 break;
4634 }
4635 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4636 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4637}
4638
4639/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4640SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4641 SelectionDAG &DAG, const SDLoc &dl,
4642 bool Signaling) const {
4643 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)((Subtarget->hasFP64() || RHS.getValueType() != MVT::f64) ?
static_cast<void> (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4643, __PRETTY_FUNCTION__))
;
4644 SDValue Cmp;
4645 if (!isFloatingPointZero(RHS))
4646 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4647 dl, MVT::Glue, LHS, RHS);
4648 else
4649 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4650 dl, MVT::Glue, LHS);
4651 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4652}
4653
4654/// duplicateCmp - Glue values can have only one use, so this function
4655/// duplicates a comparison node.
4656SDValue
4657ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4658 unsigned Opc = Cmp.getOpcode();
4659 SDLoc DL(Cmp);
4660 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4661 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4662
4663 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4663, __PRETTY_FUNCTION__))
;
4664 Cmp = Cmp.getOperand(0);
4665 Opc = Cmp.getOpcode();
4666 if (Opc == ARMISD::CMPFP)
4667 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4668 else {
4669 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4669, __PRETTY_FUNCTION__))
;
4670 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4671 }
4672 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4673}
4674
4675// This function returns three things: the arithmetic computation itself
4676// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4677// comparison and the condition code define the case in which the arithmetic
4678// computation *does not* overflow.
4679std::pair<SDValue, SDValue>
4680ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4681 SDValue &ARMcc) const {
4682 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4682, __PRETTY_FUNCTION__))
;
4683
4684 SDValue Value, OverflowCmp;
4685 SDValue LHS = Op.getOperand(0);
4686 SDValue RHS = Op.getOperand(1);
4687 SDLoc dl(Op);
4688
4689 // FIXME: We are currently always generating CMPs because we don't support
4690 // generating CMN through the backend. This is not as good as the natural
4691 // CMP case because it causes a register dependency and cannot be folded
4692 // later.
4693
4694 switch (Op.getOpcode()) {
4695 default:
4696 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4696)
;
4697 case ISD::SADDO:
4698 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4699 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4700 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4701 break;
4702 case ISD::UADDO:
4703 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4704 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4705 // We do not use it in the USUBO case as Value may not be used.
4706 Value = DAG.getNode(ARMISD::ADDC, dl,
4707 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4708 .getValue(0);
4709 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4710 break;
4711 case ISD::SSUBO:
4712 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4713 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4714 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4715 break;
4716 case ISD::USUBO:
4717 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4718 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4719 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4720 break;
4721 case ISD::UMULO:
4722 // We generate a UMUL_LOHI and then check if the high word is 0.
4723 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4724 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4725 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4726 LHS, RHS);
4727 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4728 DAG.getConstant(0, dl, MVT::i32));
4729 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4730 break;
4731 case ISD::SMULO:
4732 // We generate a SMUL_LOHI and then check if all the bits of the high word
4733 // are the same as the sign bit of the low word.
4734 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4735 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4736 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4737 LHS, RHS);
4738 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4739 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4740 Value.getValue(0),
4741 DAG.getConstant(31, dl, MVT::i32)));
4742 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4743 break;
4744 } // switch (...)
4745
4746 return std::make_pair(Value, OverflowCmp);
4747}
4748
4749SDValue
4750ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4751 // Let legalize expand this if it isn't a legal type yet.
4752 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4753 return SDValue();
4754
4755 SDValue Value, OverflowCmp;
4756 SDValue ARMcc;
4757 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4758 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4759 SDLoc dl(Op);
4760 // We use 0 and 1 as false and true values.
4761 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4762 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4763 EVT VT = Op.getValueType();
4764
4765 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4766 ARMcc, CCR, OverflowCmp);
4767
4768 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4769 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4770}
4771
4772static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4773 SelectionDAG &DAG) {
4774 SDLoc DL(BoolCarry);
4775 EVT CarryVT = BoolCarry.getValueType();
4776
4777 // This converts the boolean value carry into the carry flag by doing
4778 // ARMISD::SUBC Carry, 1
4779 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4780 DAG.getVTList(CarryVT, MVT::i32),
4781 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4782 return Carry.getValue(1);
4783}
4784
4785static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
4786 SelectionDAG &DAG) {
4787 SDLoc DL(Flags);
4788
4789 // Now convert the carry flag into a boolean carry. We do this
4790 // using ARMISD:ADDE 0, 0, Carry
4791 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4792 DAG.getConstant(0, DL, MVT::i32),
4793 DAG.getConstant(0, DL, MVT::i32), Flags);
4794}
4795
4796SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4797 SelectionDAG &DAG) const {
4798 // Let legalize expand this if it isn't a legal type yet.
4799 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4800 return SDValue();
4801
4802 SDValue LHS = Op.getOperand(0);
4803 SDValue RHS = Op.getOperand(1);
4804 SDLoc dl(Op);
4805
4806 EVT VT = Op.getValueType();
4807 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4808 SDValue Value;
4809 SDValue Overflow;
4810 switch (Op.getOpcode()) {
4811 default:
4812 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4812)
;
4813 case ISD::UADDO:
4814 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4815 // Convert the carry flag into a boolean value.
4816 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4817 break;
4818 case ISD::USUBO: {
4819 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4820 // Convert the carry flag into a boolean value.
4821 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4822 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4823 // value. So compute 1 - C.
4824 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4825 DAG.getConstant(1, dl, MVT::i32), Overflow);
4826 break;
4827 }
4828 }
4829
4830 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4831}
4832
4833static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
4834 const ARMSubtarget *Subtarget) {
4835 EVT VT = Op.getValueType();
4836 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
4837 return SDValue();
4838 if (!VT.isSimple())
4839 return SDValue();
4840
4841 unsigned NewOpcode;
4842 bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
4843 switch (VT.getSimpleVT().SimpleTy) {
4844 default:
4845 return SDValue();
4846 case MVT::i8:
4847 NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
4848 break;
4849 case MVT::i16:
4850 NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
4851 break;
4852 }
4853
4854 SDLoc dl(Op);
4855 SDValue Add =
4856 DAG.getNode(NewOpcode, dl, MVT::i32,
4857 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4858 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4859 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
4860}
4861
4862SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4863 SDValue Cond = Op.getOperand(0);
4864 SDValue SelectTrue = Op.getOperand(1);
4865 SDValue SelectFalse = Op.getOperand(2);
4866 SDLoc dl(Op);
4867 unsigned Opc = Cond.getOpcode();
4868
4869 if (Cond.getResNo() == 1 &&
4870 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4871 Opc == ISD::USUBO)) {
4872 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4873 return SDValue();
4874
4875 SDValue Value, OverflowCmp;
4876 SDValue ARMcc;
4877 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4878 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4879 EVT VT = Op.getValueType();
4880
4881 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4882 OverflowCmp, DAG);
4883 }
4884
4885 // Convert:
4886 //
4887 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4888 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4889 //
4890 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4891 const ConstantSDNode *CMOVTrue =
4892 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4893 const ConstantSDNode *CMOVFalse =
4894 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4895
4896 if (CMOVTrue && CMOVFalse) {
4897 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4898 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4899
4900 SDValue True;
4901 SDValue False;
4902 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4903 True = SelectTrue;
4904 False = SelectFalse;
4905 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4906 True = SelectFalse;
4907 False = SelectTrue;
4908 }
4909
4910 if (True.getNode() && False.getNode()) {
4911 EVT VT = Op.getValueType();
4912 SDValue ARMcc = Cond.getOperand(2);
4913 SDValue CCR = Cond.getOperand(3);
4914 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4915 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 4915, __PRETTY_FUNCTION__))
;
4916 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4917 }
4918 }
4919 }
4920
4921 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4922 // undefined bits before doing a full-word comparison with zero.
4923 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4924 DAG.getConstant(1, dl, Cond.getValueType()));
4925
4926 return DAG.getSelectCC(dl, Cond,
4927 DAG.getConstant(0, dl, Cond.getValueType()),
4928 SelectTrue, SelectFalse, ISD::SETNE);
4929}
4930
4931static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4932 bool &swpCmpOps, bool &swpVselOps) {
4933 // Start by selecting the GE condition code for opcodes that return true for
4934 // 'equality'
4935 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4936 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
4937 CondCode = ARMCC::GE;
4938
4939 // and GT for opcodes that return false for 'equality'.
4940 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4941 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
4942 CondCode = ARMCC::GT;
4943
4944 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4945 // to swap the compare operands.
4946 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4947 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
4948 swpCmpOps = true;
4949
4950 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4951 // If we have an unordered opcode, we need to swap the operands to the VSEL
4952 // instruction (effectively negating the condition).
4953 //
4954 // This also has the effect of swapping which one of 'less' or 'greater'
4955 // returns true, so we also swap the compare operands. It also switches
4956 // whether we return true for 'equality', so we compensate by picking the
4957 // opposite condition code to our original choice.
4958 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4959 CC == ISD::SETUGT) {
4960 swpCmpOps = !swpCmpOps;
4961 swpVselOps = !swpVselOps;
4962 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4963 }
4964
4965 // 'ordered' is 'anything but unordered', so use the VS condition code and
4966 // swap the VSEL operands.
4967 if (CC == ISD::SETO) {
4968 CondCode = ARMCC::VS;
4969 swpVselOps = true;
4970 }
4971
4972 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4973 // code and swap the VSEL operands. Also do this if we don't care about the
4974 // unordered case.
4975 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
4976 CondCode = ARMCC::EQ;
4977 swpVselOps = true;
4978 }
4979}
4980
4981SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4982 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4983 SDValue Cmp, SelectionDAG &DAG) const {
4984 if (!Subtarget->hasFP64() && VT == MVT::f64) {
4985 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4986 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4987 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4988 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4989
4990 SDValue TrueLow = TrueVal.getValue(0);
4991 SDValue TrueHigh = TrueVal.getValue(1);
4992 SDValue FalseLow = FalseVal.getValue(0);
4993 SDValue FalseHigh = FalseVal.getValue(1);
4994
4995 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4996 ARMcc, CCR, Cmp);
4997 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4998 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4999
5000 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5001 } else {
5002 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5003 Cmp);
5004 }
5005}
5006
5007static bool isGTorGE(ISD::CondCode CC) {
5008 return CC == ISD::SETGT || CC == ISD::SETGE;
5009}
5010
5011static bool isLTorLE(ISD::CondCode CC) {
5012 return CC == ISD::SETLT || CC == ISD::SETLE;
5013}
5014
5015// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5016// All of these conditions (and their <= and >= counterparts) will do:
5017// x < k ? k : x
5018// x > k ? x : k
5019// k < x ? x : k
5020// k > x ? k : x
5021static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5022 const SDValue TrueVal, const SDValue FalseVal,
5023 const ISD::CondCode CC, const SDValue K) {
5024 return (isGTorGE(CC) &&
5025 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5026 (isLTorLE(CC) &&
5027 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5028}
5029
5030// Check if two chained conditionals could be converted into SSAT or USAT.
5031//
5032// SSAT can replace a set of two conditional selectors that bound a number to an
5033// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5034//
5035// x < -k ? -k : (x > k ? k : x)
5036// x < -k ? -k : (x < k ? x : k)
5037// x > -k ? (x > k ? k : x) : -k
5038// x < k ? (x < -k ? -k : x) : k
5039// etc.
5040//
5041// LLVM canonicalizes these to either a min(max()) or a max(min())
5042// pattern. This function tries to match one of these and will return a SSAT
5043// node if successful.
5044//
5045// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5046// is a power of 2.
5047static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
5048 EVT VT = Op.getValueType();
5049 SDValue V1 = Op.getOperand(0);
5050 SDValue K1 = Op.getOperand(1);
5051 SDValue TrueVal1 = Op.getOperand(2);
5052 SDValue FalseVal1 = Op.getOperand(3);
5053 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5054
5055 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5056 if (Op2.getOpcode() != ISD::SELECT_CC)
5057 return SDValue();
5058
5059 SDValue V2 = Op2.getOperand(0);
5060 SDValue K2 = Op2.getOperand(1);
5061 SDValue TrueVal2 = Op2.getOperand(2);
5062 SDValue FalseVal2 = Op2.getOperand(3);
5063 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5064
5065 SDValue V1Tmp = V1;
5066 SDValue V2Tmp = V2;
5067
5068 // Check that the registers and the constants match a max(min()) or min(max())
5069 // pattern
5070 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5071 K2 != FalseVal2 ||
5072 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5073 return SDValue();
5074
5075 // Check that the constant in the lower-bound check is
5076 // the opposite of the constant in the upper-bound check
5077 // in 1's complement.
5078 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5079 return SDValue();
5080
5081 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5082 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5083 int64_t PosVal = std::max(Val1, Val2);
5084 int64_t NegVal = std::min(Val1, Val2);
5085
5086 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5087 !isPowerOf2_64(PosVal + 1))
5088 return SDValue();
5089
5090 // Handle the difference between USAT (unsigned) and SSAT (signed)
5091 // saturation
5092 // At this point, PosVal is guaranteed to be positive
5093 uint64_t K = PosVal;
5094 SDLoc dl(Op);
5095 if (Val1 == ~Val2)
5096 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5097 DAG.getConstant(countTrailingOnes(K), dl, VT));
5098 if (NegVal == 0)
5099 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5100 DAG.getConstant(countTrailingOnes(K), dl, VT));
5101
5102 return SDValue();
5103}
5104
5105// Check if a condition of the type x < k ? k : x can be converted into a
5106// bit operation instead of conditional moves.
5107// Currently this is allowed given:
5108// - The conditions and values match up
5109// - k is 0 or -1 (all ones)
5110// This function will not check the last condition, thats up to the caller
5111// It returns true if the transformation can be made, and in such case
5112// returns x in V, and k in SatK.
5113static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
5114 SDValue &SatK)
5115{
5116 SDValue LHS = Op.getOperand(0);
5117 SDValue RHS = Op.getOperand(1);
5118 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5119 SDValue TrueVal = Op.getOperand(2);
5120 SDValue FalseVal = Op.getOperand(3);
5121
5122 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5123 ? &RHS
5124 : nullptr;
5125
5126 // No constant operation in comparison, early out
5127 if (!K)
5128 return false;
5129
5130 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5131 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5132 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5133
5134 // If the constant on left and right side, or variable on left and right,
5135 // does not match, early out
5136 if (*K != KTmp || V != VTmp)
5137 return false;
5138
5139 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5140 SatK = *K;
5141 return true;
5142 }
5143
5144 return false;
5145}
5146
5147bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5148 if (VT == MVT::f32)
5149 return !Subtarget->hasVFP2Base();
5150 if (VT == MVT::f64)
5151 return !Subtarget->hasFP64();
5152 if (VT == MVT::f16)
5153 return !Subtarget->hasFullFP16();
5154 return false;
5155}
5156
5157SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5158 EVT VT = Op.getValueType();
5159 SDLoc dl(Op);
5160
5161 // Try to convert two saturating conditional selects into a single SSAT
5162 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5163 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5164 return SatValue;
5165
5166 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5167 // into more efficient bit operations, which is possible when k is 0 or -1
5168 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5169 // single instructions. On Thumb the shift and the bit operation will be two
5170 // instructions.
5171 // Only allow this transformation on full-width (32-bit) operations
5172 SDValue LowerSatConstant;
5173 SDValue SatValue;
5174 if (VT == MVT::i32 &&
5175 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5176 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5177 DAG.getConstant(31, dl, VT));
5178 if (isNullConstant(LowerSatConstant)) {
5179 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5180 DAG.getAllOnesConstant(dl, VT));
5181 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5182 } else if (isAllOnesConstant(LowerSatConstant))
5183 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5184 }
5185
5186 SDValue LHS = Op.getOperand(0);
5187 SDValue RHS = Op.getOperand(1);
5188 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5189 SDValue TrueVal = Op.getOperand(2);
5190 SDValue FalseVal = Op.getOperand(3);
5191 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5192 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5193
5194 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5195 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5196 unsigned TVal = CTVal->getZExtValue();
5197 unsigned FVal = CFVal->getZExtValue();
5198 unsigned Opcode = 0;
5199
5200 if (TVal == ~FVal) {
5201 Opcode = ARMISD::CSINV;
5202 } else if (TVal == ~FVal + 1) {
5203 Opcode = ARMISD::CSNEG;
5204 } else if (TVal + 1 == FVal) {
5205 Opcode = ARMISD::CSINC;
5206 } else if (TVal == FVal + 1) {
5207 Opcode = ARMISD::CSINC;
5208 std::swap(TrueVal, FalseVal);
5209 std::swap(TVal, FVal);
5210 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5211 }
5212
5213 if (Opcode) {
5214 // If one of the constants is cheaper than another, materialise the
5215 // cheaper one and let the csel generate the other.
5216 if (Opcode != ARMISD::CSINC &&
5217 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5218 std::swap(TrueVal, FalseVal);
5219 std::swap(TVal, FVal);
5220 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5221 }
5222
5223 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5224 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5225 // -(-a) == a, but (a+1)+1 != a).
5226 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5227 std::swap(TrueVal, FalseVal);
5228 std::swap(TVal, FVal);
5229 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5230 }
5231
5232 // Drops F's value because we can get it by inverting/negating TVal.
5233 FalseVal = TrueVal;
5234
5235 SDValue ARMcc;
5236 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5237 EVT VT = TrueVal.getValueType();
5238 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5239 }
5240 }
5241
5242 if (isUnsupportedFloatingType(LHS.getValueType())) {
5243 DAG.getTargetLoweringInfo().softenSetCCOperands(
5244 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5245
5246 // If softenSetCCOperands only returned one value, we should compare it to
5247 // zero.
5248 if (!RHS.getNode()) {
5249 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5250 CC = ISD::SETNE;
5251 }
5252 }
5253
5254 if (LHS.getValueType() == MVT::i32) {
5255 // Try to generate VSEL on ARMv8.
5256 // The VSEL instruction can't use all the usual ARM condition
5257 // codes: it only has two bits to select the condition code, so it's
5258 // constrained to use only GE, GT, VS and EQ.
5259 //
5260 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5261 // swap the operands of the previous compare instruction (effectively
5262 // inverting the compare condition, swapping 'less' and 'greater') and
5263 // sometimes need to swap the operands to the VSEL (which inverts the
5264 // condition in the sense of firing whenever the previous condition didn't)
5265 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5266 TrueVal.getValueType() == MVT::f32 ||
5267 TrueVal.getValueType() == MVT::f64)) {
5268 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5269 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5270 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5271 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5272 std::swap(TrueVal, FalseVal);
5273 }
5274 }
5275
5276 SDValue ARMcc;
5277 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5278 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5279 // Choose GE over PL, which vsel does now support
5280 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5281 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5282 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5283 }
5284
5285 ARMCC::CondCodes CondCode, CondCode2;
5286 FPCCToARMCC(CC, CondCode, CondCode2);
5287
5288 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5289 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5290 // must use VSEL (limited condition codes), due to not having conditional f16
5291 // moves.
5292 if (Subtarget->hasFPARMv8Base() &&
5293 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5294 (TrueVal.getValueType() == MVT::f16 ||
5295 TrueVal.getValueType() == MVT::f32 ||
5296 TrueVal.getValueType() == MVT::f64)) {
5297 bool swpCmpOps = false;
5298 bool swpVselOps = false;
5299 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5300
5301 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5302 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5303 if (swpCmpOps)
5304 std::swap(LHS, RHS);
5305 if (swpVselOps)
5306 std::swap(TrueVal, FalseVal);
5307 }
5308 }
5309
5310 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5311 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5312 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5313 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5314 if (CondCode2 != ARMCC::AL) {
5315 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5316 // FIXME: Needs another CMP because flag can have but one use.
5317 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5318 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5319 }
5320 return Result;
5321}
5322
5323/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5324/// to morph to an integer compare sequence.
5325static bool canChangeToInt(SDValue Op, bool &SeenZero,
5326 const ARMSubtarget *Subtarget) {
5327 SDNode *N = Op.getNode();
5328 if (!N->hasOneUse())
5329 // Otherwise it requires moving the value from fp to integer registers.
5330 return false;
5331 if (!N->getNumValues())
5332 return false;
5333 EVT VT = Op.getValueType();
5334 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5335 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5336 // vmrs are very slow, e.g. cortex-a8.
5337 return false;
5338
5339 if (isFloatingPointZero(Op)) {
5340 SeenZero = true;
5341 return true;
5342 }
5343 return ISD::isNormalLoad(N);
5344}
5345
5346static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
5347 if (isFloatingPointZero(Op))
5348 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5349
5350 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5351 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5352 Ld->getPointerInfo(), Ld->getAlignment(),
5353 Ld->getMemOperand()->getFlags());
5354
5355 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5355)
;
5356}
5357
5358static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5359 SDValue &RetVal1, SDValue &RetVal2) {
5360 SDLoc dl(Op);
5361
5362 if (isFloatingPointZero(Op)) {
5363 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5364 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5365 return;
5366 }
5367
5368 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5369 SDValue Ptr = Ld->getBasePtr();
5370 RetVal1 =
5371 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5372 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
5373
5374 EVT PtrType = Ptr.getValueType();
5375 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
5376 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5377 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5378 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5379 Ld->getPointerInfo().getWithOffset(4), NewAlign,
5380 Ld->getMemOperand()->getFlags());
5381 return;
5382 }
5383
5384 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5384)
;
5385}
5386
5387/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5388/// f32 and even f64 comparisons to integer ones.
5389SDValue
5390ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5391 SDValue Chain = Op.getOperand(0);
5392 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5393 SDValue LHS = Op.getOperand(2);
5394 SDValue RHS = Op.getOperand(3);
5395 SDValue Dest = Op.getOperand(4);
5396 SDLoc dl(Op);
5397
5398 bool LHSSeenZero = false;
5399 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5400 bool RHSSeenZero = false;
5401 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5402 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5403 // If unsafe fp math optimization is enabled and there are no other uses of
5404 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5405 // to an integer comparison.
5406 if (CC == ISD::SETOEQ)
5407 CC = ISD::SETEQ;
5408 else if (CC == ISD::SETUNE)
5409 CC = ISD::SETNE;
5410
5411 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5412 SDValue ARMcc;
5413 if (LHS.getValueType() == MVT::f32) {
5414 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5415 bitcastf32Toi32(LHS, DAG), Mask);
5416 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5417 bitcastf32Toi32(RHS, DAG), Mask);
5418 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5419 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5420 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5421 Chain, Dest, ARMcc, CCR, Cmp);
5422 }
5423
5424 SDValue LHS1, LHS2;
5425 SDValue RHS1, RHS2;
5426 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5427 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5428 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5429 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5430 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5431 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5432 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5433 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5434 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5435 }
5436
5437 return SDValue();
5438}
5439
5440SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5441 SDValue Chain = Op.getOperand(0);
5442 SDValue Cond = Op.getOperand(1);
5443 SDValue Dest = Op.getOperand(2);
5444 SDLoc dl(Op);
5445
5446 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5447 // instruction.
5448 unsigned Opc = Cond.getOpcode();
5449 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5450 !Subtarget->isThumb1Only();
5451 if (Cond.getResNo() == 1 &&
5452 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5453 Opc == ISD::USUBO || OptimizeMul)) {
5454 // Only lower legal XALUO ops.
5455 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5456 return SDValue();
5457
5458 // The actual operation with overflow check.
5459 SDValue Value, OverflowCmp;
5460 SDValue ARMcc;
5461 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5462
5463 // Reverse the condition code.
5464 ARMCC::CondCodes CondCode =
5465 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5466 CondCode = ARMCC::getOppositeCondition(CondCode);
5467 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5468 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5469
5470 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5471 OverflowCmp);
5472 }
5473
5474 return SDValue();
5475}
5476
5477SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5478 SDValue Chain = Op.getOperand(0);
5479 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5480 SDValue LHS = Op.getOperand(2);
5481 SDValue RHS = Op.getOperand(3);
5482 SDValue Dest = Op.getOperand(4);
5483 SDLoc dl(Op);
5484
5485 if (isUnsupportedFloatingType(LHS.getValueType())) {
5486 DAG.getTargetLoweringInfo().softenSetCCOperands(
5487 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5488
5489 // If softenSetCCOperands only returned one value, we should compare it to
5490 // zero.
5491 if (!RHS.getNode()) {
5492 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5493 CC = ISD::SETNE;
5494 }
5495 }
5496
5497 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5498 // instruction.
5499 unsigned Opc = LHS.getOpcode();
5500 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5501 !Subtarget->isThumb1Only();
5502 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5503 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5504 Opc == ISD::USUBO || OptimizeMul) &&
5505 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5506 // Only lower legal XALUO ops.
5507 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5508 return SDValue();
5509
5510 // The actual operation with overflow check.
5511 SDValue Value, OverflowCmp;
5512 SDValue ARMcc;
5513 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5514
5515 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5516 // Reverse the condition code.
5517 ARMCC::CondCodes CondCode =
5518 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5519 CondCode = ARMCC::getOppositeCondition(CondCode);
5520 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5521 }
5522 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5523
5524 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5525 OverflowCmp);
5526 }
5527
5528 if (LHS.getValueType() == MVT::i32) {
5529 SDValue ARMcc;
5530 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5531 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5532 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5533 Chain, Dest, ARMcc, CCR, Cmp);
5534 }
5535
5536 if (getTargetMachine().Options.UnsafeFPMath &&
5537 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5538 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5539 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5540 return Result;
5541 }
5542
5543 ARMCC::CondCodes CondCode, CondCode2;
5544 FPCCToARMCC(CC, CondCode, CondCode2);
5545
5546 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5547 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5548 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5549 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5550 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5551 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5552 if (CondCode2 != ARMCC::AL) {
5553 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5554 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5555 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5556 }
5557 return Res;
5558}
5559
5560SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5561 SDValue Chain = Op.getOperand(0);
5562 SDValue Table = Op.getOperand(1);
5563 SDValue Index = Op.getOperand(2);
5564 SDLoc dl(Op);
5565
5566 EVT PTy = getPointerTy(DAG.getDataLayout());
5567 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5568 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5569 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5570 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5571 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5572 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5573 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5574 // which does another jump to the destination. This also makes it easier
5575 // to translate it to TBB / TBH later (Thumb2 only).
5576 // FIXME: This might not work if the function is extremely large.
5577 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5578 Addr, Op.getOperand(2), JTI);
5579 }
5580 if (isPositionIndependent() || Subtarget->isROPI()) {
5581 Addr =
5582 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5583 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5584 Chain = Addr.getValue(1);
5585 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5586 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5587 } else {
5588 Addr =
5589 DAG.getLoad(PTy, dl, Chain, Addr,
5590 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5591 Chain = Addr.getValue(1);
5592 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5593 }
5594}
5595
5596static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
5597 EVT VT = Op.getValueType();
5598 SDLoc dl(Op);
5599
5600 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5601 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5602 return Op;
5603 return DAG.UnrollVectorOp(Op.getNode());
5604 }
5605
5606 const bool HasFullFP16 =
5607 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5608
5609 EVT NewTy;
5610 const EVT OpTy = Op.getOperand(0).getValueType();
5611 if (OpTy == MVT::v4f32)
5612 NewTy = MVT::v4i32;
5613 else if (OpTy == MVT::v4f16 && HasFullFP16)
5614 NewTy = MVT::v4i16;
5615 else if (OpTy == MVT::v8f16 && HasFullFP16)
5616 NewTy = MVT::v8i16;
5617 else
5618 llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!"
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5618)
;
5619
5620 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5621 return DAG.UnrollVectorOp(Op.getNode());
5622
5623 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5624 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5625}
5626
5627SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5628 EVT VT = Op.getValueType();
5629 if (VT.isVector())
5630 return LowerVectorFP_TO_INT(Op, DAG);
5631
5632 bool IsStrict = Op->isStrictFPOpcode();
5633 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5634
5635 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5636 RTLIB::Libcall LC;
5637 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5638 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5639 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5640 Op.getValueType());
5641 else
5642 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5643 Op.getValueType());
5644 SDLoc Loc(Op);
5645 MakeLibCallOptions CallOptions;
5646 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5647 SDValue Result;
5648 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5649 CallOptions, Loc, Chain);
5650 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5651 }
5652
5653 // FIXME: Remove this when we have strict fp instruction selection patterns
5654 if (IsStrict) {
5655 SDLoc Loc(Op);
5656 SDValue Result =
5657 DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5658 : ISD::FP_TO_UINT,
5659 Loc, Op.getValueType(), SrcVal);
5660 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5661 }
5662
5663 return Op;
5664}
5665
5666static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
5667 EVT VT = Op.getValueType();
5668 SDLoc dl(Op);
5669
5670 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5671 if (VT.getVectorElementType() == MVT::f32)
5672 return Op;
5673 return DAG.UnrollVectorOp(Op.getNode());
5674 }
5675
5676 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
5677 Op.getOperand(0).getValueType() == MVT::v8i16) &&(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
5678 "Invalid type for custom lowering!")(((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand
(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5678, __PRETTY_FUNCTION__))
;
5679
5680 const bool HasFullFP16 =
5681 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5682
5683 EVT DestVecType;
5684 if (VT == MVT::v4f32)
5685 DestVecType = MVT::v4i32;
5686 else if (VT == MVT::v4f16 && HasFullFP16)
5687 DestVecType = MVT::v4i16;
5688 else if (VT == MVT::v8f16 && HasFullFP16)
5689 DestVecType = MVT::v8i16;
5690 else
5691 return DAG.UnrollVectorOp(Op.getNode());
5692
5693 unsigned CastOpc;
5694 unsigned Opc;
5695 switch (Op.getOpcode()) {
5696 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5696)
;
5697 case ISD::SINT_TO_FP:
5698 CastOpc = ISD::SIGN_EXTEND;
5699 Opc = ISD::SINT_TO_FP;
5700 break;
5701 case ISD::UINT_TO_FP:
5702 CastOpc = ISD::ZERO_EXTEND;
5703 Opc = ISD::UINT_TO_FP;
5704 break;
5705 }
5706
5707 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5708 return DAG.getNode(Opc, dl, VT, Op);
5709}
5710
5711SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5712 EVT VT = Op.getValueType();
5713 if (VT.isVector())
5714 return LowerVectorINT_TO_FP(Op, DAG);
5715 if (isUnsupportedFloatingType(VT)) {
5716 RTLIB::Libcall LC;
5717 if (Op.getOpcode() == ISD::SINT_TO_FP)
5718 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5719 Op.getValueType());
5720 else
5721 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5722 Op.getValueType());
5723 MakeLibCallOptions CallOptions;
5724 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5725 CallOptions, SDLoc(Op)).first;
5726 }
5727
5728 return Op;
5729}
5730
5731SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5732 // Implement fcopysign with a fabs and a conditional fneg.
5733 SDValue Tmp0 = Op.getOperand(0);
5734 SDValue Tmp1 = Op.getOperand(1);
5735 SDLoc dl(Op);
5736 EVT VT = Op.getValueType();
5737 EVT SrcVT = Tmp1.getValueType();
5738 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5739 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5740 bool UseNEON = !InGPR && Subtarget->hasNEON();
5741
5742 if (UseNEON) {
5743 // Use VBSL to copy the sign bit.
5744 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5745 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5746 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5747 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5748 if (VT == MVT::f64)
5749 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5750 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5751 DAG.getConstant(32, dl, MVT::i32));
5752 else /*if (VT == MVT::f32)*/
5753 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5754 if (SrcVT == MVT::f32) {
5755 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5756 if (VT == MVT::f64)
5757 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5758 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5759 DAG.getConstant(32, dl, MVT::i32));
5760 } else if (VT == MVT::f32)
5761 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5762 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5763 DAG.getConstant(32, dl, MVT::i32));
5764 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5765 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5766
5767 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
5768 dl, MVT::i32);
5769 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5770 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5771 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5772
5773 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5774 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5775 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5776 if (VT == MVT::f32) {
5777 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5778 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5779 DAG.getConstant(0, dl, MVT::i32));
5780 } else {
5781 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5782 }
5783
5784 return Res;
5785 }
5786
5787 // Bitcast operand 1 to i32.
5788 if (SrcVT == MVT::f64)
5789 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5790 Tmp1).getValue(1);
5791 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5792
5793 // Or in the signbit with integer operations.
5794 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5795 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5796 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
5797 if (VT == MVT::f32) {
5798 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
5799 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
5800 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
5801 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
5802 }
5803
5804 // f64: Or the high part with signbit and then combine two parts.
5805 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5806 Tmp0);
5807 SDValue Lo = Tmp0.getValue(0);
5808 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
5809 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
5810 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
5811}
5812
5813SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
5814 MachineFunction &MF = DAG.getMachineFunction();
5815 MachineFrameInfo &MFI = MF.getFrameInfo();
5816 MFI.setReturnAddressIsTaken(true);
5817
5818 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
5819 return SDValue();
5820
5821 EVT VT = Op.getValueType();
5822 SDLoc dl(Op);
5823 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5824 if (Depth) {
5825 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5826 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
5827 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
5828 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
5829 MachinePointerInfo());
5830 }
5831
5832 // Return LR, which contains the return address. Mark it an implicit live-in.
5833 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
5834 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
5835}
5836
5837SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
5838 const ARMBaseRegisterInfo &ARI =
5839 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
5840 MachineFunction &MF = DAG.getMachineFunction();
5841 MachineFrameInfo &MFI = MF.getFrameInfo();
5842 MFI.setFrameAddressIsTaken(true);
5843
5844 EVT VT = Op.getValueType();
5845 SDLoc dl(Op); // FIXME probably not meaningful
5846 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5847 Register FrameReg = ARI.getFrameRegister(MF);
5848 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
5849 while (Depth--)
5850 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
5851 MachinePointerInfo());
5852 return FrameAddr;
5853}
5854
5855// FIXME? Maybe this could be a TableGen attribute on some registers and
5856// this table could be generated automatically from RegInfo.
5857Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
5858 const MachineFunction &MF) const {
5859 Register Reg = StringSwitch<unsigned>(RegName)
5860 .Case("sp", ARM::SP)
5861 .Default(0);
5862 if (Reg)
5863 return Reg;
5864 report_fatal_error(Twine("Invalid register name \""
5865 + StringRef(RegName) + "\"."));
5866}
5867
5868// Result is 64 bit value so split into two 32 bit values and return as a
5869// pair of values.
5870static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
5871 SelectionDAG &DAG) {
5872 SDLoc DL(N);
5873
5874 // This function is only supposed to be called for i64 type destination.
5875 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5876, __PRETTY_FUNCTION__))
5876 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/ARM/ARMISelLowering.cpp"
, 5876, __PRETTY_FUNCTION__))
;
5877
5878 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
5879 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
5880 N->getOperand(0),
5881 N->getOperand(1));
5882
5883 Results.push