Bug Summary

File:llvm/lib/Target/ARM/ARMISelLowering.cpp
Warning:line 14441, column 25
The result of the left shift is undefined due to shifting by '4294967264', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/ARM/ARMISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
24#include "ARMTargetTransformInfo.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
59#include "llvm/CodeGen/SelectionDAGNodes.h"
60#include "llvm/CodeGen/TargetInstrInfo.h"
61#include "llvm/CodeGen/TargetLowering.h"
62#include "llvm/CodeGen/TargetOpcodes.h"
63#include "llvm/CodeGen/TargetRegisterInfo.h"
64#include "llvm/CodeGen/TargetSubtargetInfo.h"
65#include "llvm/CodeGen/ValueTypes.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/DerivedTypes.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
76#include "llvm/IR/GlobalVariable.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Instructions.h"
81#include "llvm/IR/IntrinsicInst.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/IntrinsicsARM.h"
84#include "llvm/IR/Module.h"
85#include "llvm/IR/PatternMatch.h"
86#include "llvm/IR/Type.h"
87#include "llvm/IR/User.h"
88#include "llvm/IR/Value.h"
89#include "llvm/MC/MCInstrDesc.h"
90#include "llvm/MC/MCInstrItineraries.h"
91#include "llvm/MC/MCRegisterInfo.h"
92#include "llvm/MC/MCSchedule.h"
93#include "llvm/Support/AtomicOrdering.h"
94#include "llvm/Support/BranchProbability.h"
95#include "llvm/Support/Casting.h"
96#include "llvm/Support/CodeGen.h"
97#include "llvm/Support/CommandLine.h"
98#include "llvm/Support/Compiler.h"
99#include "llvm/Support/Debug.h"
100#include "llvm/Support/ErrorHandling.h"
101#include "llvm/Support/KnownBits.h"
102#include "llvm/Support/MachineValueType.h"
103#include "llvm/Support/MathExtras.h"
104#include "llvm/Support/raw_ostream.h"
105#include "llvm/Target/TargetMachine.h"
106#include "llvm/Target/TargetOptions.h"
107#include <algorithm>
108#include <cassert>
109#include <cstdint>
110#include <cstdlib>
111#include <iterator>
112#include <limits>
113#include <string>
114#include <tuple>
115#include <utility>
116#include <vector>
117
118using namespace llvm;
119using namespace llvm::PatternMatch;
120
121#define DEBUG_TYPE"arm-isel" "arm-isel"
122
123STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls"}
;
124STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt"}
;
125STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments"}
;
126STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
127 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
}
;
128
129static cl::opt<bool>
130ARMInterworking("arm-interworking", cl::Hidden,
131 cl::desc("Enable / disable ARM interworking (for debugging only)"),
132 cl::init(true));
133
134static cl::opt<bool> EnableConstpoolPromotion(
135 "arm-promote-constant", cl::Hidden,
136 cl::desc("Enable / disable promotion of unnamed_addr constants into "
137 "constant pools"),
138 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
139static cl::opt<unsigned> ConstpoolPromotionMaxSize(
140 "arm-promote-constant-max-size", cl::Hidden,
141 cl::desc("Maximum size of constant to promote into a constant pool"),
142 cl::init(64));
143static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
144 "arm-promote-constant-max-total", cl::Hidden,
145 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
146 cl::init(128));
147
148cl::opt<unsigned>
149MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
150 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
151 cl::init(2));
152
153// The APCS parameter registers.
154static const MCPhysReg GPRArgRegs[] = {
155 ARM::R0, ARM::R1, ARM::R2, ARM::R3
156};
157
158void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
159 if (VT != PromotedLdStVT) {
160 setOperationAction(ISD::LOAD, VT, Promote);
161 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
162
163 setOperationAction(ISD::STORE, VT, Promote);
164 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
165 }
166
167 MVT ElemTy = VT.getVectorElementType();
168 if (ElemTy != MVT::f64)
169 setOperationAction(ISD::SETCC, VT, Custom);
170 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
171 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
172 if (ElemTy == MVT::i32) {
173 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
174 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
175 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
176 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
177 } else {
178 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
179 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
180 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
181 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
182 }
183 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
184 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
185 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
186 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
187 setOperationAction(ISD::SELECT, VT, Expand);
188 setOperationAction(ISD::SELECT_CC, VT, Expand);
189 setOperationAction(ISD::VSELECT, VT, Expand);
190 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
191 if (VT.isInteger()) {
192 setOperationAction(ISD::SHL, VT, Custom);
193 setOperationAction(ISD::SRA, VT, Custom);
194 setOperationAction(ISD::SRL, VT, Custom);
195 }
196
197 // Neon does not support vector divide/remainder operations.
198 setOperationAction(ISD::SDIV, VT, Expand);
199 setOperationAction(ISD::UDIV, VT, Expand);
200 setOperationAction(ISD::FDIV, VT, Expand);
201 setOperationAction(ISD::SREM, VT, Expand);
202 setOperationAction(ISD::UREM, VT, Expand);
203 setOperationAction(ISD::FREM, VT, Expand);
204 setOperationAction(ISD::SDIVREM, VT, Expand);
205 setOperationAction(ISD::UDIVREM, VT, Expand);
206
207 if (!VT.isFloatingPoint() &&
208 VT != MVT::v2i64 && VT != MVT::v1i64)
209 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210 setOperationAction(Opcode, VT, Legal);
211 if (!VT.isFloatingPoint())
212 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
213 setOperationAction(Opcode, VT, Legal);
214}
215
216void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
217 addRegisterClass(VT, &ARM::DPRRegClass);
218 addTypeForNEON(VT, MVT::f64);
219}
220
221void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
222 addRegisterClass(VT, &ARM::DPairRegClass);
223 addTypeForNEON(VT, MVT::v2f64);
224}
225
226void ARMTargetLowering::setAllExpand(MVT VT) {
227 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
228 setOperationAction(Opc, VT, Expand);
229
230 // We support these really simple operations even on types where all
231 // the actual arithmetic has to be broken down into simpler
232 // operations or turned into library calls.
233 setOperationAction(ISD::BITCAST, VT, Legal);
234 setOperationAction(ISD::LOAD, VT, Legal);
235 setOperationAction(ISD::STORE, VT, Legal);
236 setOperationAction(ISD::UNDEF, VT, Legal);
237}
238
239void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
240 LegalizeAction Action) {
241 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
242 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
244}
245
246void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
247 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
248
249 for (auto VT : IntTypes) {
250 addRegisterClass(VT, &ARM::MQPRRegClass);
251 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
252 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
253 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
254 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
255 setOperationAction(ISD::SHL, VT, Custom);
256 setOperationAction(ISD::SRA, VT, Custom);
257 setOperationAction(ISD::SRL, VT, Custom);
258 setOperationAction(ISD::SMIN, VT, Legal);
259 setOperationAction(ISD::SMAX, VT, Legal);
260 setOperationAction(ISD::UMIN, VT, Legal);
261 setOperationAction(ISD::UMAX, VT, Legal);
262 setOperationAction(ISD::ABS, VT, Legal);
263 setOperationAction(ISD::SETCC, VT, Custom);
264 setOperationAction(ISD::MLOAD, VT, Custom);
265 setOperationAction(ISD::MSTORE, VT, Legal);
266 setOperationAction(ISD::CTLZ, VT, Legal);
267 setOperationAction(ISD::CTTZ, VT, Custom);
268 setOperationAction(ISD::BITREVERSE, VT, Legal);
269 setOperationAction(ISD::BSWAP, VT, Legal);
270 setOperationAction(ISD::SADDSAT, VT, Legal);
271 setOperationAction(ISD::UADDSAT, VT, Legal);
272 setOperationAction(ISD::SSUBSAT, VT, Legal);
273 setOperationAction(ISD::USUBSAT, VT, Legal);
274 setOperationAction(ISD::ABDS, VT, Legal);
275 setOperationAction(ISD::ABDU, VT, Legal);
276
277 // No native support for these.
278 setOperationAction(ISD::UDIV, VT, Expand);
279 setOperationAction(ISD::SDIV, VT, Expand);
280 setOperationAction(ISD::UREM, VT, Expand);
281 setOperationAction(ISD::SREM, VT, Expand);
282 setOperationAction(ISD::UDIVREM, VT, Expand);
283 setOperationAction(ISD::SDIVREM, VT, Expand);
284 setOperationAction(ISD::CTPOP, VT, Expand);
285 setOperationAction(ISD::SELECT, VT, Expand);
286 setOperationAction(ISD::SELECT_CC, VT, Expand);
287
288 // Vector reductions
289 setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
290 setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);
291 setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);
292 setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);
293 setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);
294 setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);
295 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
296 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
297 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
298
299 if (!HasMVEFP) {
300 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
301 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
302 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
303 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
304 } else {
305 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
306 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
307 }
308
309 // Pre and Post inc are supported on loads and stores
310 for (unsigned im = (unsigned)ISD::PRE_INC;
311 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
312 setIndexedLoadAction(im, VT, Legal);
313 setIndexedStoreAction(im, VT, Legal);
314 setIndexedMaskedLoadAction(im, VT, Legal);
315 setIndexedMaskedStoreAction(im, VT, Legal);
316 }
317 }
318
319 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
320 for (auto VT : FloatTypes) {
321 addRegisterClass(VT, &ARM::MQPRRegClass);
322 if (!HasMVEFP)
323 setAllExpand(VT);
324
325 // These are legal or custom whether we have MVE.fp or not
326 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
327 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
328 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
329 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
330 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
331 setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
332 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
333 setOperationAction(ISD::SETCC, VT, Custom);
334 setOperationAction(ISD::MLOAD, VT, Custom);
335 setOperationAction(ISD::MSTORE, VT, Legal);
336 setOperationAction(ISD::SELECT, VT, Expand);
337 setOperationAction(ISD::SELECT_CC, VT, Expand);
338
339 // Pre and Post inc are supported on loads and stores
340 for (unsigned im = (unsigned)ISD::PRE_INC;
341 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
342 setIndexedLoadAction(im, VT, Legal);
343 setIndexedStoreAction(im, VT, Legal);
344 setIndexedMaskedLoadAction(im, VT, Legal);
345 setIndexedMaskedStoreAction(im, VT, Legal);
346 }
347
348 if (HasMVEFP) {
349 setOperationAction(ISD::FMINNUM, VT, Legal);
350 setOperationAction(ISD::FMAXNUM, VT, Legal);
351 setOperationAction(ISD::FROUND, VT, Legal);
352 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
353 setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
354 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
355 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
356
357 // No native support for these.
358 setOperationAction(ISD::FDIV, VT, Expand);
359 setOperationAction(ISD::FREM, VT, Expand);
360 setOperationAction(ISD::FSQRT, VT, Expand);
361 setOperationAction(ISD::FSIN, VT, Expand);
362 setOperationAction(ISD::FCOS, VT, Expand);
363 setOperationAction(ISD::FPOW, VT, Expand);
364 setOperationAction(ISD::FLOG, VT, Expand);
365 setOperationAction(ISD::FLOG2, VT, Expand);
366 setOperationAction(ISD::FLOG10, VT, Expand);
367 setOperationAction(ISD::FEXP, VT, Expand);
368 setOperationAction(ISD::FEXP2, VT, Expand);
369 setOperationAction(ISD::FNEARBYINT, VT, Expand);
370 }
371 }
372
373 // Custom Expand smaller than legal vector reductions to prevent false zero
374 // items being added.
375 setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);
376 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);
377 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);
378 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);
379 setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
380 setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);
381 setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
382 setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
383
384 // We 'support' these types up to bitcast/load/store level, regardless of
385 // MVE integer-only / float support. Only doing FP data processing on the FP
386 // vector types is inhibited at integer-only level.
387 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
388 for (auto VT : LongTypes) {
389 addRegisterClass(VT, &ARM::MQPRRegClass);
390 setAllExpand(VT);
391 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
392 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
393 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
394 }
395 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
396
397 // We can do bitwise operations on v2i64 vectors
398 setOperationAction(ISD::AND, MVT::v2i64, Legal);
399 setOperationAction(ISD::OR, MVT::v2i64, Legal);
400 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
401
402 // It is legal to extload from v4i8 to v4i16 or v4i32.
403 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
404 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
405 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
406
407 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
408 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
409 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
410 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
411 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
412 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
413
414 // Some truncating stores are legal too.
415 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
416 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
417 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
418
419 // Pre and Post inc on these are legal, given the correct extends
420 for (unsigned im = (unsigned)ISD::PRE_INC;
421 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
422 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
423 setIndexedLoadAction(im, VT, Legal);
424 setIndexedStoreAction(im, VT, Legal);
425 setIndexedMaskedLoadAction(im, VT, Legal);
426 setIndexedMaskedStoreAction(im, VT, Legal);
427 }
428 }
429
430 // Predicate types
431 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
432 for (auto VT : pTypes) {
433 addRegisterClass(VT, &ARM::VCCRRegClass);
434 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
435 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
436 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
437 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
440 setOperationAction(ISD::SETCC, VT, Custom);
441 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
442 setOperationAction(ISD::LOAD, VT, Custom);
443 setOperationAction(ISD::STORE, VT, Custom);
444 setOperationAction(ISD::TRUNCATE, VT, Custom);
445 setOperationAction(ISD::VSELECT, VT, Expand);
446 setOperationAction(ISD::SELECT, VT, Expand);
447 }
448 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
449 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
450 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
451 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
452 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
453 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
454 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
455 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
456}
457
458ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
459 const ARMSubtarget &STI)
460 : TargetLowering(TM), Subtarget(&STI) {
461 RegInfo = Subtarget->getRegisterInfo();
462 Itins = Subtarget->getInstrItineraryData();
463
464 setBooleanContents(ZeroOrOneBooleanContent);
465 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
466
467 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
468 !Subtarget->isTargetWatchOS()) {
469 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
470 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
471 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
472 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
473 : CallingConv::ARM_AAPCS);
474 }
475
476 if (Subtarget->isTargetMachO()) {
477 // Uses VFP for Thumb libfuncs if available.
478 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
479 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
480 static const struct {
481 const RTLIB::Libcall Op;
482 const char * const Name;
483 const ISD::CondCode Cond;
484 } LibraryCalls[] = {
485 // Single-precision floating-point arithmetic.
486 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
487 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
488 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
489 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
490
491 // Double-precision floating-point arithmetic.
492 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
493 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
494 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
495 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
496
497 // Single-precision comparisons.
498 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
499 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
500 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
501 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
502 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
503 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
504 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
505
506 // Double-precision comparisons.
507 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
508 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
509 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
510 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
511 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
512 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
513 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
514
515 // Floating-point to integer conversions.
516 // i64 conversions are done via library routines even when generating VFP
517 // instructions, so use the same ones.
518 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
519 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
520 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
521 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
522
523 // Conversions between floating types.
524 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
525 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
526
527 // Integer to floating-point conversions.
528 // i64 conversions are done via library routines even when generating VFP
529 // instructions, so use the same ones.
530 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
531 // e.g., __floatunsidf vs. __floatunssidfvfp.
532 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
533 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
534 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
535 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
536 };
537
538 for (const auto &LC : LibraryCalls) {
539 setLibcallName(LC.Op, LC.Name);
540 if (LC.Cond != ISD::SETCC_INVALID)
541 setCmpLibcallCC(LC.Op, LC.Cond);
542 }
543 }
544 }
545
546 // These libcalls are not available in 32-bit.
547 setLibcallName(RTLIB::SHL_I128, nullptr);
548 setLibcallName(RTLIB::SRL_I128, nullptr);
549 setLibcallName(RTLIB::SRA_I128, nullptr);
550 setLibcallName(RTLIB::MUL_I128, nullptr);
551 setLibcallName(RTLIB::MULO_I64, nullptr);
552 setLibcallName(RTLIB::MULO_I128, nullptr);
553
554 // RTLIB
555 if (Subtarget->isAAPCS_ABI() &&
556 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
557 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
558 static const struct {
559 const RTLIB::Libcall Op;
560 const char * const Name;
561 const CallingConv::ID CC;
562 const ISD::CondCode Cond;
563 } LibraryCalls[] = {
564 // Double-precision floating-point arithmetic helper functions
565 // RTABI chapter 4.1.2, Table 2
566 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
567 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
568 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
569 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
570
571 // Double-precision floating-point comparison helper functions
572 // RTABI chapter 4.1.2, Table 3
573 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
574 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
575 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
576 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
577 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
578 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
579 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
580
581 // Single-precision floating-point arithmetic helper functions
582 // RTABI chapter 4.1.2, Table 4
583 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
584 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
585 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
586 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
587
588 // Single-precision floating-point comparison helper functions
589 // RTABI chapter 4.1.2, Table 5
590 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
591 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
592 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
593 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
594 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
595 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
596 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
597
598 // Floating-point to integer conversions.
599 // RTABI chapter 4.1.2, Table 6
600 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
601 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
602 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
603 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
604 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
605 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
606 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
607 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
608
609 // Conversions between floating types.
610 // RTABI chapter 4.1.2, Table 7
611 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
614
615 // Integer to floating-point conversions.
616 // RTABI chapter 4.1.2, Table 8
617 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
618 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
619 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
620 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
621 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
622 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
623 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
624 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
625
626 // Long long helper functions
627 // RTABI chapter 4.2, Table 9
628 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632
633 // Integer division functions
634 // RTABI chapter 4.3.1
635 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
636 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
637 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
641 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
642 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
643 };
644
645 for (const auto &LC : LibraryCalls) {
646 setLibcallName(LC.Op, LC.Name);
647 setLibcallCallingConv(LC.Op, LC.CC);
648 if (LC.Cond != ISD::SETCC_INVALID)
649 setCmpLibcallCC(LC.Op, LC.Cond);
650 }
651
652 // EABI dependent RTLIB
653 if (TM.Options.EABIVersion == EABI::EABI4 ||
654 TM.Options.EABIVersion == EABI::EABI5) {
655 static const struct {
656 const RTLIB::Libcall Op;
657 const char *const Name;
658 const CallingConv::ID CC;
659 const ISD::CondCode Cond;
660 } MemOpsLibraryCalls[] = {
661 // Memory operations
662 // RTABI chapter 4.3.4
663 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 };
667
668 for (const auto &LC : MemOpsLibraryCalls) {
669 setLibcallName(LC.Op, LC.Name);
670 setLibcallCallingConv(LC.Op, LC.CC);
671 if (LC.Cond != ISD::SETCC_INVALID)
672 setCmpLibcallCC(LC.Op, LC.Cond);
673 }
674 }
675 }
676
677 if (Subtarget->isTargetWindows()) {
678 static const struct {
679 const RTLIB::Libcall Op;
680 const char * const Name;
681 const CallingConv::ID CC;
682 } LibraryCalls[] = {
683 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
684 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
685 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
686 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
687 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
688 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
689 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
690 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
691 };
692
693 for (const auto &LC : LibraryCalls) {
694 setLibcallName(LC.Op, LC.Name);
695 setLibcallCallingConv(LC.Op, LC.CC);
696 }
697 }
698
699 // Use divmod compiler-rt calls for iOS 5.0 and later.
700 if (Subtarget->isTargetMachO() &&
701 !(Subtarget->isTargetIOS() &&
702 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
703 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
704 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
705 }
706
707 // The half <-> float conversion functions are always soft-float on
708 // non-watchos platforms, but are needed for some targets which use a
709 // hard-float calling convention by default.
710 if (!Subtarget->isTargetWatchABI()) {
711 if (Subtarget->isAAPCS_ABI()) {
712 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
713 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
714 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
715 } else {
716 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
717 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
718 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
719 }
720 }
721
722 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
723 // a __gnu_ prefix (which is the default).
724 if (Subtarget->isTargetAEABI()) {
725 static const struct {
726 const RTLIB::Libcall Op;
727 const char * const Name;
728 const CallingConv::ID CC;
729 } LibraryCalls[] = {
730 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
731 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
732 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
733 };
734
735 for (const auto &LC : LibraryCalls) {
736 setLibcallName(LC.Op, LC.Name);
737 setLibcallCallingConv(LC.Op, LC.CC);
738 }
739 }
740
741 if (Subtarget->isThumb1Only())
742 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
743 else
744 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
745
746 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
747 Subtarget->hasFPRegs()) {
748 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
749 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
750
751 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
752 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
753 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
754 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
755
756 if (!Subtarget->hasVFP2Base())
757 setAllExpand(MVT::f32);
758 if (!Subtarget->hasFP64())
759 setAllExpand(MVT::f64);
760 }
761
762 if (Subtarget->hasFullFP16()) {
763 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
764 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
765 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
766
767 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
768 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
769 }
770
771 if (Subtarget->hasBF16()) {
772 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
773 setAllExpand(MVT::bf16);
774 if (!Subtarget->hasFullFP16())
775 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
776 }
777
778 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
779 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
780 setTruncStoreAction(VT, InnerVT, Expand);
781 addAllExtLoads(VT, InnerVT, Expand);
782 }
783
784 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
786
787 setOperationAction(ISD::BSWAP, VT, Expand);
788 }
789
790 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
791 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
792
793 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
794 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
795
796 if (Subtarget->hasMVEIntegerOps())
797 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
798
799 // Combine low-overhead loop intrinsics so that we can lower i1 types.
800 if (Subtarget->hasLOB()) {
801 setTargetDAGCombine(ISD::BRCOND);
802 setTargetDAGCombine(ISD::BR_CC);
803 }
804
805 if (Subtarget->hasNEON()) {
806 addDRTypeForNEON(MVT::v2f32);
807 addDRTypeForNEON(MVT::v8i8);
808 addDRTypeForNEON(MVT::v4i16);
809 addDRTypeForNEON(MVT::v2i32);
810 addDRTypeForNEON(MVT::v1i64);
811
812 addQRTypeForNEON(MVT::v4f32);
813 addQRTypeForNEON(MVT::v2f64);
814 addQRTypeForNEON(MVT::v16i8);
815 addQRTypeForNEON(MVT::v8i16);
816 addQRTypeForNEON(MVT::v4i32);
817 addQRTypeForNEON(MVT::v2i64);
818
819 if (Subtarget->hasFullFP16()) {
820 addQRTypeForNEON(MVT::v8f16);
821 addDRTypeForNEON(MVT::v4f16);
822 }
823
824 if (Subtarget->hasBF16()) {
825 addQRTypeForNEON(MVT::v8bf16);
826 addDRTypeForNEON(MVT::v4bf16);
827 }
828 }
829
830 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
831 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
832 // none of Neon, MVE or VFP supports any arithmetic operations on it.
833 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
834 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
835 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
836 // FIXME: Code duplication: FDIV and FREM are expanded always, see
837 // ARMTargetLowering::addTypeForNEON method for details.
838 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
839 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
840 // FIXME: Create unittest.
841 // In another words, find a way when "copysign" appears in DAG with vector
842 // operands.
843 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
844 // FIXME: Code duplication: SETCC has custom operation action, see
845 // ARMTargetLowering::addTypeForNEON method for details.
846 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
847 // FIXME: Create unittest for FNEG and for FABS.
848 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
849 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
850 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
851 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
852 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
853 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
854 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
855 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
856 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
857 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
858 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
859 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
860 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
861 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
862 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
863 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
864 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
865 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
866 }
867
868 if (Subtarget->hasNEON()) {
869 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
870 // supported for v4f32.
871 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
872 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
873 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
874 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
875 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
876 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
877 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
878 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
879 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
880 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
881 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
882 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
883 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
884 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
885
886 // Mark v2f32 intrinsics.
887 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
888 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
889 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
890 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
891 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
892 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
893 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
894 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
895 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
896 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
897 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
898 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
899 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
900 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
901
902 // Neon does not support some operations on v1i64 and v2i64 types.
903 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
904 // Custom handling for some quad-vector types to detect VMULL.
905 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
906 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
907 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
908 // Custom handling for some vector types to avoid expensive expansions
909 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
910 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
911 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
912 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
913 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
914 // a destination type that is wider than the source, and nor does
915 // it have a FP_TO_[SU]INT instruction with a narrower destination than
916 // source.
917 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
918 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
919 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
920 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
921 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
922 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
923 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
924 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
925
926 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
927 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
928
929 // NEON does not have single instruction CTPOP for vectors with element
930 // types wider than 8-bits. However, custom lowering can leverage the
931 // v8i8/v16i8 vcnt instruction.
932 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
933 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
934 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
935 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
936 setOperationAction(ISD::CTPOP, MVT::v1i64, Custom);
937 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
938
939 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
940 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
941
942 // NEON does not have single instruction CTTZ for vectors.
943 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
944 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
945 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
946 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
947
948 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
949 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
950 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
951 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
952
953 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
954 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
955 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
956 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
957
958 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
959 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
960 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
961 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
962
963 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
964 setOperationAction(ISD::MULHS, VT, Expand);
965 setOperationAction(ISD::MULHU, VT, Expand);
966 }
967
968 // NEON only has FMA instructions as of VFP4.
969 if (!Subtarget->hasVFP4Base()) {
970 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
971 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
972 }
973
974 setTargetDAGCombine(ISD::SHL);
975 setTargetDAGCombine(ISD::SRL);
976 setTargetDAGCombine(ISD::SRA);
977 setTargetDAGCombine(ISD::FP_TO_SINT);
978 setTargetDAGCombine(ISD::FP_TO_UINT);
979 setTargetDAGCombine(ISD::FDIV);
980 setTargetDAGCombine(ISD::LOAD);
981
982 // It is legal to extload from v4i8 to v4i16 or v4i32.
983 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
984 MVT::v2i32}) {
985 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
986 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
987 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
988 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
989 }
990 }
991 }
992
993 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
994 setTargetDAGCombine(ISD::BUILD_VECTOR);
995 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
996 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
997 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
998 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
999 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1000 setTargetDAGCombine(ISD::STORE);
1001 setTargetDAGCombine(ISD::SIGN_EXTEND);
1002 setTargetDAGCombine(ISD::ZERO_EXTEND);
1003 setTargetDAGCombine(ISD::ANY_EXTEND);
1004 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1005 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1006 setTargetDAGCombine(ISD::INTRINSIC_VOID);
1007 setTargetDAGCombine(ISD::VECREDUCE_ADD);
1008 setTargetDAGCombine(ISD::ADD);
1009 setTargetDAGCombine(ISD::BITCAST);
1010 }
1011 if (Subtarget->hasMVEIntegerOps()) {
1012 setTargetDAGCombine(ISD::SMIN);
1013 setTargetDAGCombine(ISD::UMIN);
1014 setTargetDAGCombine(ISD::SMAX);
1015 setTargetDAGCombine(ISD::UMAX);
1016 setTargetDAGCombine(ISD::FP_EXTEND);
1017 setTargetDAGCombine(ISD::SELECT);
1018 setTargetDAGCombine(ISD::SELECT_CC);
1019 }
1020
1021 if (!Subtarget->hasFP64()) {
1022 // When targeting a floating-point unit with only single-precision
1023 // operations, f64 is legal for the few double-precision instructions which
1024 // are present However, no double-precision operations other than moves,
1025 // loads and stores are provided by the hardware.
1026 setOperationAction(ISD::FADD, MVT::f64, Expand);
1027 setOperationAction(ISD::FSUB, MVT::f64, Expand);
1028 setOperationAction(ISD::FMUL, MVT::f64, Expand);
1029 setOperationAction(ISD::FMA, MVT::f64, Expand);
1030 setOperationAction(ISD::FDIV, MVT::f64, Expand);
1031 setOperationAction(ISD::FREM, MVT::f64, Expand);
1032 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
1033 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
1034 setOperationAction(ISD::FNEG, MVT::f64, Expand);
1035 setOperationAction(ISD::FABS, MVT::f64, Expand);
1036 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
1037 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1038 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1039 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1040 setOperationAction(ISD::FLOG, MVT::f64, Expand);
1041 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
1042 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
1043 setOperationAction(ISD::FEXP, MVT::f64, Expand);
1044 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
1045 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
1046 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
1047 setOperationAction(ISD::FRINT, MVT::f64, Expand);
1048 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
1049 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
1050 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
1051 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
1052 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
1053 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
1054 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
1055 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
1056 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
1057 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
1058 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
1059 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
1060 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
1061 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
1062 }
1063
1064 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1065 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
1066 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
1067 if (Subtarget->hasFullFP16()) {
1068 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
1069 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
1070 }
1071 }
1072
1073 if (!Subtarget->hasFP16()) {
1074 setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
1075 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
1076 }
1077
1078 computeRegisterProperties(Subtarget->getRegisterInfo());
1079
1080 // ARM does not have floating-point extending loads.
1081 for (MVT VT : MVT::fp_valuetypes()) {
1082 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1083 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1084 }
1085
1086 // ... or truncating stores
1087 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1088 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1089 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1090
1091 // ARM does not have i1 sign extending load.
1092 for (MVT VT : MVT::integer_valuetypes())
1093 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1094
1095 // ARM supports all 4 flavors of integer indexed load / store.
1096 if (!Subtarget->isThumb1Only()) {
1097 for (unsigned im = (unsigned)ISD::PRE_INC;
1098 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1099 setIndexedLoadAction(im, MVT::i1, Legal);
1100 setIndexedLoadAction(im, MVT::i8, Legal);
1101 setIndexedLoadAction(im, MVT::i16, Legal);
1102 setIndexedLoadAction(im, MVT::i32, Legal);
1103 setIndexedStoreAction(im, MVT::i1, Legal);
1104 setIndexedStoreAction(im, MVT::i8, Legal);
1105 setIndexedStoreAction(im, MVT::i16, Legal);
1106 setIndexedStoreAction(im, MVT::i32, Legal);
1107 }
1108 } else {
1109 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1110 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
1111 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
1112 }
1113
1114 setOperationAction(ISD::SADDO, MVT::i32, Custom);
1115 setOperationAction(ISD::UADDO, MVT::i32, Custom);
1116 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
1117 setOperationAction(ISD::USUBO, MVT::i32, Custom);
1118
1119 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
1120 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1121 if (Subtarget->hasDSP()) {
1122 setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1123 setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1124 setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1125 setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1126 setOperationAction(ISD::UADDSAT, MVT::i8, Custom);
1127 setOperationAction(ISD::USUBSAT, MVT::i8, Custom);
1128 setOperationAction(ISD::UADDSAT, MVT::i16, Custom);
1129 setOperationAction(ISD::USUBSAT, MVT::i16, Custom);
1130 }
1131 if (Subtarget->hasBaseDSP()) {
1132 setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1133 setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
1134 }
1135
1136 // i64 operation support.
1137 setOperationAction(ISD::MUL, MVT::i64, Expand);
1138 setOperationAction(ISD::MULHU, MVT::i32, Expand);
1139 if (Subtarget->isThumb1Only()) {
1140 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
1141 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
1142 }
1143 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1144 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1145 setOperationAction(ISD::MULHS, MVT::i32, Expand);
1146
1147 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
1148 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
1149 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
1150 setOperationAction(ISD::SRL, MVT::i64, Custom);
1151 setOperationAction(ISD::SRA, MVT::i64, Custom);
1152 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1153 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1154 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1155 setOperationAction(ISD::STORE, MVT::i64, Custom);
1156
1157 // MVE lowers 64 bit shifts to lsll and lsrl
1158 // assuming that ISD::SRL and SRA of i64 are already marked custom
1159 if (Subtarget->hasMVEIntegerOps())
1160 setOperationAction(ISD::SHL, MVT::i64, Custom);
1161
1162 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1163 if (Subtarget->isThumb1Only()) {
1164 setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
1165 setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
1166 setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
1167 }
1168
1169 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1170 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1171
1172 // ARM does not have ROTL.
1173 setOperationAction(ISD::ROTL, MVT::i32, Expand);
1174 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1175 setOperationAction(ISD::ROTL, VT, Expand);
1176 setOperationAction(ISD::ROTR, VT, Expand);
1177 }
1178 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1179 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1180 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1181 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
1182 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
1183 }
1184
1185 // @llvm.readcyclecounter requires the Performance Monitors extension.
1186 // Default to the 0 expansion on unsupported platforms.
1187 // FIXME: Technically there are older ARM CPUs that have
1188 // implementation-specific ways of obtaining this information.
1189 if (Subtarget->hasPerfMon())
1190 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1191
1192 // Only ARMv6 has BSWAP.
1193 if (!Subtarget->hasV6Ops())
1194 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
1195
1196 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1197 : Subtarget->hasDivideInARMMode();
1198 if (!hasDivide) {
1199 // These are expanded into libcalls if the cpu doesn't have HW divider.
1200 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
1201 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
1202 }
1203
1204 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1205 setOperationAction(ISD::SDIV, MVT::i32, Custom);
1206 setOperationAction(ISD::UDIV, MVT::i32, Custom);
1207
1208 setOperationAction(ISD::SDIV, MVT::i64, Custom);
1209 setOperationAction(ISD::UDIV, MVT::i64, Custom);
1210 }
1211
1212 setOperationAction(ISD::SREM, MVT::i32, Expand);
1213 setOperationAction(ISD::UREM, MVT::i32, Expand);
1214
1215 // Register based DivRem for AEABI (RTABI 4.2)
1216 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1217 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1218 Subtarget->isTargetWindows()) {
1219 setOperationAction(ISD::SREM, MVT::i64, Custom);
1220 setOperationAction(ISD::UREM, MVT::i64, Custom);
1221 HasStandaloneRem = false;
1222
1223 if (Subtarget->isTargetWindows()) {
1224 const struct {
1225 const RTLIB::Libcall Op;
1226 const char * const Name;
1227 const CallingConv::ID CC;
1228 } LibraryCalls[] = {
1229 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1230 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1231 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1232 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1233
1234 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1235 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1236 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1237 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1238 };
1239
1240 for (const auto &LC : LibraryCalls) {
1241 setLibcallName(LC.Op, LC.Name);
1242 setLibcallCallingConv(LC.Op, LC.CC);
1243 }
1244 } else {
1245 const struct {
1246 const RTLIB::Libcall Op;
1247 const char * const Name;
1248 const CallingConv::ID CC;
1249 } LibraryCalls[] = {
1250 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1251 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1252 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1253 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1254
1255 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1256 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1257 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1258 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1259 };
1260
1261 for (const auto &LC : LibraryCalls) {
1262 setLibcallName(LC.Op, LC.Name);
1263 setLibcallCallingConv(LC.Op, LC.CC);
1264 }
1265 }
1266
1267 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
1268 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
1269 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
1270 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
1271 } else {
1272 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
1273 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
1274 }
1275
1276 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1277 // MSVCRT doesn't have powi; fall back to pow
1278 setLibcallName(RTLIB::POWI_F32, nullptr);
1279 setLibcallName(RTLIB::POWI_F64, nullptr);
1280 }
1281
1282 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1283 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1284 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1285 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1286
1287 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1288 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
1289
1290 // Use the default implementation.
1291 setOperationAction(ISD::VASTART, MVT::Other, Custom);
1292 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1293 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1294 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1295 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1296 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1297
1298 if (Subtarget->isTargetWindows())
1299 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1300 else
1301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
1302
1303 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1304 // the default expansion.
1305 InsertFencesForAtomic = false;
1306 if (Subtarget->hasAnyDataBarrier() &&
1307 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1308 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1309 // to ldrex/strex loops already.
1310 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1311 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1312 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
1313
1314 // On v8, we have particularly efficient implementations of atomic fences
1315 // if they can be combined with nearby atomic loads and stores.
1316 if (!Subtarget->hasAcquireRelease() ||
1317 getTargetMachine().getOptLevel() == 0) {
1318 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1319 InsertFencesForAtomic = true;
1320 }
1321 } else {
1322 // If there's anything we can use as a barrier, go through custom lowering
1323 // for ATOMIC_FENCE.
1324 // If target has DMB in thumb, Fences can be inserted.
1325 if (Subtarget->hasDataBarrier())
1326 InsertFencesForAtomic = true;
1327
1328 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
1329 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1330
1331 // Set them all for expansion, which will force libcalls.
1332 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
1333 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
1334 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
1335 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
1336 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
1337 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
1338 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
1339 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
1340 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
1341 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
1342 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
1343 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
1344 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1345 // Unordered/Monotonic case.
1346 if (!InsertFencesForAtomic) {
1347 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1348 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1349 }
1350 }
1351
1352 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1353
1354 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1355 if (!Subtarget->hasV6Ops()) {
1356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1358 }
1359 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1360
1361 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1362 !Subtarget->isThumb1Only()) {
1363 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1364 // iff target supports vfp2.
1365 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1366 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1367 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
1368 }
1369
1370 // We want to custom lower some of our intrinsics.
1371 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1372 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1373 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1374 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1375 if (Subtarget->useSjLjEH())
1376 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1377
1378 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1379 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1380 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1381 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1382 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1383 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1384 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1385 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1386 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1387 if (Subtarget->hasFullFP16()) {
1388 setOperationAction(ISD::SETCC, MVT::f16, Expand);
1389 setOperationAction(ISD::SELECT, MVT::f16, Custom);
1390 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
1391 }
1392
1393 setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
1394
1395 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1396 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1397 if (Subtarget->hasFullFP16())
1398 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
1399 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1400 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1401 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1402
1403 // We don't support sin/cos/fmod/copysign/pow
1404 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1405 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1406 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1407 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1408 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1409 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1410 setOperationAction(ISD::FREM, MVT::f64, Expand);
1411 setOperationAction(ISD::FREM, MVT::f32, Expand);
1412 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1413 !Subtarget->isThumb1Only()) {
1414 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1415 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1416 }
1417 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1418 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1419
1420 if (!Subtarget->hasVFP4Base()) {
1421 setOperationAction(ISD::FMA, MVT::f64, Expand);
1422 setOperationAction(ISD::FMA, MVT::f32, Expand);
1423 }
1424
1425 // Various VFP goodness
1426 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1427 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1428 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1429 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1430 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1431 }
1432
1433 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1434 if (!Subtarget->hasFP16()) {
1435 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1436 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1437 }
1438
1439 // Strict floating-point comparisons need custom lowering.
1440 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
1441 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
1442 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
1443 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
1444 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
1445 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
1446 }
1447
1448 // Use __sincos_stret if available.
1449 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1450 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1451 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1452 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1453 }
1454
1455 // FP-ARMv8 implements a lot of rounding-like FP operations.
1456 if (Subtarget->hasFPARMv8Base()) {
1457 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1458 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1459 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1460 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1461 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1462 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1463 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1464 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1465 if (Subtarget->hasNEON()) {
1466 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1467 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1468 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1469 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1470 }
1471
1472 if (Subtarget->hasFP64()) {
1473 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1474 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1475 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1476 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1477 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1478 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1479 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1480 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1481 }
1482 }
1483
1484 // FP16 often need to be promoted to call lib functions
1485 if (Subtarget->hasFullFP16()) {
1486 setOperationAction(ISD::FREM, MVT::f16, Promote);
1487 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
1488 setOperationAction(ISD::FSIN, MVT::f16, Promote);
1489 setOperationAction(ISD::FCOS, MVT::f16, Promote);
1490 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
1491 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
1492 setOperationAction(ISD::FPOW, MVT::f16, Promote);
1493 setOperationAction(ISD::FEXP, MVT::f16, Promote);
1494 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
1495 setOperationAction(ISD::FLOG, MVT::f16, Promote);
1496 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
1497 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
1498
1499 setOperationAction(ISD::FROUND, MVT::f16, Legal);
1500 }
1501
1502 if (Subtarget->hasNEON()) {
1503 // vmin and vmax aren't available in a scalar form, so we can use
1504 // a NEON instruction with an undef lane instead. This has a performance
1505 // penalty on some cores, so we don't do this unless we have been
1506 // asked to by the core tuning model.
1507 if (Subtarget->useNEONForSinglePrecisionFP()) {
1508 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1509 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1510 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1511 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1512 }
1513 setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
1514 setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
1515 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
1516 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
1517
1518 if (Subtarget->hasFullFP16()) {
1519 setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);
1520 setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);
1521 setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);
1522 setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);
1523
1524 setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);
1525 setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);
1526 setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);
1527 setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);
1528 }
1529 }
1530
1531 // We have target-specific dag combine patterns for the following nodes:
1532 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1533 setTargetDAGCombine(ISD::ADD);
1534 setTargetDAGCombine(ISD::SUB);
1535 setTargetDAGCombine(ISD::MUL);
1536 setTargetDAGCombine(ISD::AND);
1537 setTargetDAGCombine(ISD::OR);
1538 setTargetDAGCombine(ISD::XOR);
1539
1540 if (Subtarget->hasMVEIntegerOps())
1541 setTargetDAGCombine(ISD::VSELECT);
1542
1543 if (Subtarget->hasV6Ops())
1544 setTargetDAGCombine(ISD::SRL);
1545 if (Subtarget->isThumb1Only())
1546 setTargetDAGCombine(ISD::SHL);
1547
1548 setStackPointerRegisterToSaveRestore(ARM::SP);
1549
1550 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1551 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1552 setSchedulingPreference(Sched::RegPressure);
1553 else
1554 setSchedulingPreference(Sched::Hybrid);
1555
1556 //// temporary - rewrite interface to use type
1557 MaxStoresPerMemset = 8;
1558 MaxStoresPerMemsetOptSize = 4;
1559 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1560 MaxStoresPerMemcpyOptSize = 2;
1561 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1562 MaxStoresPerMemmoveOptSize = 2;
1563
1564 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1565 // are at least 4 bytes aligned.
1566 setMinStackArgumentAlignment(Align(4));
1567
1568 // Prefer likely predicted branches to selects on out-of-order cores.
1569 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1570
1571 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1572
1573 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1574
1575 if (Subtarget->isThumb() || Subtarget->isThumb2())
1576 setTargetDAGCombine(ISD::ABS);
1577}
1578
1579bool ARMTargetLowering::useSoftFloat() const {
1580 return Subtarget->useSoftFloat();
1581}
1582
1583// FIXME: It might make sense to define the representative register class as the
1584// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1585// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1586// SPR's representative would be DPR_VFP2. This should work well if register
1587// pressure tracking were modified such that a register use would increment the
1588// pressure of the register class's representative and all of it's super
1589// classes' representatives transitively. We have not implemented this because
1590// of the difficulty prior to coalescing of modeling operand register classes
1591// due to the common occurrence of cross class copies and subregister insertions
1592// and extractions.
1593std::pair<const TargetRegisterClass *, uint8_t>
1594ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1595 MVT VT) const {
1596 const TargetRegisterClass *RRC = nullptr;
1597 uint8_t Cost = 1;
1598 switch (VT.SimpleTy) {
1599 default:
1600 return TargetLowering::findRepresentativeClass(TRI, VT);
1601 // Use DPR as representative register class for all floating point
1602 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1603 // the cost is 1 for both f32 and f64.
1604 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1605 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1606 RRC = &ARM::DPRRegClass;
1607 // When NEON is used for SP, only half of the register file is available
1608 // because operations that define both SP and DP results will be constrained
1609 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1610 // coalescing by double-counting the SP regs. See the FIXME above.
1611 if (Subtarget->useNEONForSinglePrecisionFP())
1612 Cost = 2;
1613 break;
1614 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1615 case MVT::v4f32: case MVT::v2f64:
1616 RRC = &ARM::DPRRegClass;
1617 Cost = 2;
1618 break;
1619 case MVT::v4i64:
1620 RRC = &ARM::DPRRegClass;
1621 Cost = 4;
1622 break;
1623 case MVT::v8i64:
1624 RRC = &ARM::DPRRegClass;
1625 Cost = 8;
1626 break;
1627 }
1628 return std::make_pair(RRC, Cost);
1629}
1630
1631const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1632#define MAKE_CASE(V) \
1633 case V: \
1634 return #V;
1635 switch ((ARMISD::NodeType)Opcode) {
1636 case ARMISD::FIRST_NUMBER:
1637 break;
1638 MAKE_CASE(ARMISD::Wrapper)
1639 MAKE_CASE(ARMISD::WrapperPIC)
1640 MAKE_CASE(ARMISD::WrapperJT)
1641 MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL)
1642 MAKE_CASE(ARMISD::CALL)
1643 MAKE_CASE(ARMISD::CALL_PRED)
1644 MAKE_CASE(ARMISD::CALL_NOLINK)
1645 MAKE_CASE(ARMISD::tSECALL)
1646 MAKE_CASE(ARMISD::BRCOND)
1647 MAKE_CASE(ARMISD::BR_JT)
1648 MAKE_CASE(ARMISD::BR2_JT)
1649 MAKE_CASE(ARMISD::RET_FLAG)
1650 MAKE_CASE(ARMISD::SERET_FLAG)
1651 MAKE_CASE(ARMISD::INTRET_FLAG)
1652 MAKE_CASE(ARMISD::PIC_ADD)
1653 MAKE_CASE(ARMISD::CMP)
1654 MAKE_CASE(ARMISD::CMN)
1655 MAKE_CASE(ARMISD::CMPZ)
1656 MAKE_CASE(ARMISD::CMPFP)
1657 MAKE_CASE(ARMISD::CMPFPE)
1658 MAKE_CASE(ARMISD::CMPFPw0)
1659 MAKE_CASE(ARMISD::CMPFPEw0)
1660 MAKE_CASE(ARMISD::BCC_i64)
1661 MAKE_CASE(ARMISD::FMSTAT)
1662 MAKE_CASE(ARMISD::CMOV)
1663 MAKE_CASE(ARMISD::SUBS)
1664 MAKE_CASE(ARMISD::SSAT)
1665 MAKE_CASE(ARMISD::USAT)
1666 MAKE_CASE(ARMISD::ASRL)
1667 MAKE_CASE(ARMISD::LSRL)
1668 MAKE_CASE(ARMISD::LSLL)
1669 MAKE_CASE(ARMISD::SRL_FLAG)
1670 MAKE_CASE(ARMISD::SRA_FLAG)
1671 MAKE_CASE(ARMISD::RRX)
1672 MAKE_CASE(ARMISD::ADDC)
1673 MAKE_CASE(ARMISD::ADDE)
1674 MAKE_CASE(ARMISD::SUBC)
1675 MAKE_CASE(ARMISD::SUBE)
1676 MAKE_CASE(ARMISD::LSLS)
1677 MAKE_CASE(ARMISD::VMOVRRD)
1678 MAKE_CASE(ARMISD::VMOVDRR)
1679 MAKE_CASE(ARMISD::VMOVhr)
1680 MAKE_CASE(ARMISD::VMOVrh)
1681 MAKE_CASE(ARMISD::VMOVSR)
1682 MAKE_CASE(ARMISD::EH_SJLJ_SETJMP)
1683 MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP)
1684 MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH)
1685 MAKE_CASE(ARMISD::TC_RETURN)
1686 MAKE_CASE(ARMISD::THREAD_POINTER)
1687 MAKE_CASE(ARMISD::DYN_ALLOC)
1688 MAKE_CASE(ARMISD::MEMBARRIER_MCR)
1689 MAKE_CASE(ARMISD::PRELOAD)
1690 MAKE_CASE(ARMISD::LDRD)
1691 MAKE_CASE(ARMISD::STRD)
1692 MAKE_CASE(ARMISD::WIN__CHKSTK)
1693 MAKE_CASE(ARMISD::WIN__DBZCHK)
1694 MAKE_CASE(ARMISD::PREDICATE_CAST)
1695 MAKE_CASE(ARMISD::VECTOR_REG_CAST)
1696 MAKE_CASE(ARMISD::MVESEXT)
1697 MAKE_CASE(ARMISD::MVEZEXT)
1698 MAKE_CASE(ARMISD::MVETRUNC)
1699 MAKE_CASE(ARMISD::VCMP)
1700 MAKE_CASE(ARMISD::VCMPZ)
1701 MAKE_CASE(ARMISD::VTST)
1702 MAKE_CASE(ARMISD::VSHLs)
1703 MAKE_CASE(ARMISD::VSHLu)
1704 MAKE_CASE(ARMISD::VSHLIMM)
1705 MAKE_CASE(ARMISD::VSHRsIMM)
1706 MAKE_CASE(ARMISD::VSHRuIMM)
1707 MAKE_CASE(ARMISD::VRSHRsIMM)
1708 MAKE_CASE(ARMISD::VRSHRuIMM)
1709 MAKE_CASE(ARMISD::VRSHRNIMM)
1710 MAKE_CASE(ARMISD::VQSHLsIMM)
1711 MAKE_CASE(ARMISD::VQSHLuIMM)
1712 MAKE_CASE(ARMISD::VQSHLsuIMM)
1713 MAKE_CASE(ARMISD::VQSHRNsIMM)
1714 MAKE_CASE(ARMISD::VQSHRNuIMM)
1715 MAKE_CASE(ARMISD::VQSHRNsuIMM)
1716 MAKE_CASE(ARMISD::VQRSHRNsIMM)
1717 MAKE_CASE(ARMISD::VQRSHRNuIMM)
1718 MAKE_CASE(ARMISD::VQRSHRNsuIMM)
1719 MAKE_CASE(ARMISD::VSLIIMM)
1720 MAKE_CASE(ARMISD::VSRIIMM)
1721 MAKE_CASE(ARMISD::VGETLANEu)
1722 MAKE_CASE(ARMISD::VGETLANEs)
1723 MAKE_CASE(ARMISD::VMOVIMM)
1724 MAKE_CASE(ARMISD::VMVNIMM)
1725 MAKE_CASE(ARMISD::VMOVFPIMM)
1726 MAKE_CASE(ARMISD::VDUP)
1727 MAKE_CASE(ARMISD::VDUPLANE)
1728 MAKE_CASE(ARMISD::VEXT)
1729 MAKE_CASE(ARMISD::VREV64)
1730 MAKE_CASE(ARMISD::VREV32)
1731 MAKE_CASE(ARMISD::VREV16)
1732 MAKE_CASE(ARMISD::VZIP)
1733 MAKE_CASE(ARMISD::VUZP)
1734 MAKE_CASE(ARMISD::VTRN)
1735 MAKE_CASE(ARMISD::VTBL1)
1736 MAKE_CASE(ARMISD::VTBL2)
1737 MAKE_CASE(ARMISD::VMOVN)
1738 MAKE_CASE(ARMISD::VQMOVNs)
1739 MAKE_CASE(ARMISD::VQMOVNu)
1740 MAKE_CASE(ARMISD::VCVTN)
1741 MAKE_CASE(ARMISD::VCVTL)
1742 MAKE_CASE(ARMISD::VIDUP)
1743 MAKE_CASE(ARMISD::VMULLs)
1744 MAKE_CASE(ARMISD::VMULLu)
1745 MAKE_CASE(ARMISD::VQDMULH)
1746 MAKE_CASE(ARMISD::VADDVs)
1747 MAKE_CASE(ARMISD::VADDVu)
1748 MAKE_CASE(ARMISD::VADDVps)
1749 MAKE_CASE(ARMISD::VADDVpu)
1750 MAKE_CASE(ARMISD::VADDLVs)
1751 MAKE_CASE(ARMISD::VADDLVu)
1752 MAKE_CASE(ARMISD::VADDLVAs)
1753 MAKE_CASE(ARMISD::VADDLVAu)
1754 MAKE_CASE(ARMISD::VADDLVps)
1755 MAKE_CASE(ARMISD::VADDLVpu)
1756 MAKE_CASE(ARMISD::VADDLVAps)
1757 MAKE_CASE(ARMISD::VADDLVApu)
1758 MAKE_CASE(ARMISD::VMLAVs)
1759 MAKE_CASE(ARMISD::VMLAVu)
1760 MAKE_CASE(ARMISD::VMLAVps)
1761 MAKE_CASE(ARMISD::VMLAVpu)
1762 MAKE_CASE(ARMISD::VMLALVs)
1763 MAKE_CASE(ARMISD::VMLALVu)
1764 MAKE_CASE(ARMISD::VMLALVps)
1765 MAKE_CASE(ARMISD::VMLALVpu)
1766 MAKE_CASE(ARMISD::VMLALVAs)
1767 MAKE_CASE(ARMISD::VMLALVAu)
1768 MAKE_CASE(ARMISD::VMLALVAps)
1769 MAKE_CASE(ARMISD::VMLALVApu)
1770 MAKE_CASE(ARMISD::VMINVu)
1771 MAKE_CASE(ARMISD::VMINVs)
1772 MAKE_CASE(ARMISD::VMAXVu)
1773 MAKE_CASE(ARMISD::VMAXVs)
1774 MAKE_CASE(ARMISD::UMAAL)
1775 MAKE_CASE(ARMISD::UMLAL)
1776 MAKE_CASE(ARMISD::SMLAL)
1777 MAKE_CASE(ARMISD::SMLALBB)
1778 MAKE_CASE(ARMISD::SMLALBT)
1779 MAKE_CASE(ARMISD::SMLALTB)
1780 MAKE_CASE(ARMISD::SMLALTT)
1781 MAKE_CASE(ARMISD::SMULWB)
1782 MAKE_CASE(ARMISD::SMULWT)
1783 MAKE_CASE(ARMISD::SMLALD)
1784 MAKE_CASE(ARMISD::SMLALDX)
1785 MAKE_CASE(ARMISD::SMLSLD)
1786 MAKE_CASE(ARMISD::SMLSLDX)
1787 MAKE_CASE(ARMISD::SMMLAR)
1788 MAKE_CASE(ARMISD::SMMLSR)
1789 MAKE_CASE(ARMISD::QADD16b)
1790 MAKE_CASE(ARMISD::QSUB16b)
1791 MAKE_CASE(ARMISD::QADD8b)
1792 MAKE_CASE(ARMISD::QSUB8b)
1793 MAKE_CASE(ARMISD::UQADD16b)
1794 MAKE_CASE(ARMISD::UQSUB16b)
1795 MAKE_CASE(ARMISD::UQADD8b)
1796 MAKE_CASE(ARMISD::UQSUB8b)
1797 MAKE_CASE(ARMISD::BUILD_VECTOR)
1798 MAKE_CASE(ARMISD::BFI)
1799 MAKE_CASE(ARMISD::VORRIMM)
1800 MAKE_CASE(ARMISD::VBICIMM)
1801 MAKE_CASE(ARMISD::VBSP)
1802 MAKE_CASE(ARMISD::MEMCPY)
1803 MAKE_CASE(ARMISD::VLD1DUP)
1804 MAKE_CASE(ARMISD::VLD2DUP)
1805 MAKE_CASE(ARMISD::VLD3DUP)
1806 MAKE_CASE(ARMISD::VLD4DUP)
1807 MAKE_CASE(ARMISD::VLD1_UPD)
1808 MAKE_CASE(ARMISD::VLD2_UPD)
1809 MAKE_CASE(ARMISD::VLD3_UPD)
1810 MAKE_CASE(ARMISD::VLD4_UPD)
1811 MAKE_CASE(ARMISD::VLD1x2_UPD)
1812 MAKE_CASE(ARMISD::VLD1x3_UPD)
1813 MAKE_CASE(ARMISD::VLD1x4_UPD)
1814 MAKE_CASE(ARMISD::VLD2LN_UPD)
1815 MAKE_CASE(ARMISD::VLD3LN_UPD)
1816 MAKE_CASE(ARMISD::VLD4LN_UPD)
1817 MAKE_CASE(ARMISD::VLD1DUP_UPD)
1818 MAKE_CASE(ARMISD::VLD2DUP_UPD)
1819 MAKE_CASE(ARMISD::VLD3DUP_UPD)
1820 MAKE_CASE(ARMISD::VLD4DUP_UPD)
1821 MAKE_CASE(ARMISD::VST1_UPD)
1822 MAKE_CASE(ARMISD::VST2_UPD)
1823 MAKE_CASE(ARMISD::VST3_UPD)
1824 MAKE_CASE(ARMISD::VST4_UPD)
1825 MAKE_CASE(ARMISD::VST1x2_UPD)
1826 MAKE_CASE(ARMISD::VST1x3_UPD)
1827 MAKE_CASE(ARMISD::VST1x4_UPD)
1828 MAKE_CASE(ARMISD::VST2LN_UPD)
1829 MAKE_CASE(ARMISD::VST3LN_UPD)
1830 MAKE_CASE(ARMISD::VST4LN_UPD)
1831 MAKE_CASE(ARMISD::WLS)
1832 MAKE_CASE(ARMISD::WLSSETUP)
1833 MAKE_CASE(ARMISD::LE)
1834 MAKE_CASE(ARMISD::LOOP_DEC)
1835 MAKE_CASE(ARMISD::CSINV)
1836 MAKE_CASE(ARMISD::CSNEG)
1837 MAKE_CASE(ARMISD::CSINC)
1838 MAKE_CASE(ARMISD::MEMCPYLOOP)
1839 MAKE_CASE(ARMISD::MEMSETLOOP)
1840#undef MAKE_CASE
1841 }
1842 return nullptr;
1843}
1844
1845EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1846 EVT VT) const {
1847 if (!VT.isVector())
1848 return getPointerTy(DL);
1849
1850 // MVE has a predicate register.
1851 if ((Subtarget->hasMVEIntegerOps() &&
1852 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8)) ||
1853 (Subtarget->hasMVEFloatOps() && (VT == MVT::v4f32 || VT == MVT::v8f16)))
1854 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1855 return VT.changeVectorElementTypeToInteger();
1856}
1857
1858/// getRegClassFor - Return the register class that should be used for the
1859/// specified value type.
1860const TargetRegisterClass *
1861ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1862 (void)isDivergent;
1863 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1864 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1865 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1866 // MVE Q registers.
1867 if (Subtarget->hasNEON()) {
1868 if (VT == MVT::v4i64)
1869 return &ARM::QQPRRegClass;
1870 if (VT == MVT::v8i64)
1871 return &ARM::QQQQPRRegClass;
1872 }
1873 if (Subtarget->hasMVEIntegerOps()) {
1874 if (VT == MVT::v4i64)
1875 return &ARM::MQQPRRegClass;
1876 if (VT == MVT::v8i64)
1877 return &ARM::MQQQQPRRegClass;
1878 }
1879 return TargetLowering::getRegClassFor(VT);
1880}
1881
1882// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1883// source/dest is aligned and the copy size is large enough. We therefore want
1884// to align such objects passed to memory intrinsics.
1885bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1886 unsigned &PrefAlign) const {
1887 if (!isa<MemIntrinsic>(CI))
1888 return false;
1889 MinSize = 8;
1890 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1891 // cycle faster than 4-byte aligned LDM.
1892 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1893 return true;
1894}
1895
1896// Create a fast isel object.
1897FastISel *
1898ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1899 const TargetLibraryInfo *libInfo) const {
1900 return ARM::createFastISel(funcInfo, libInfo);
1901}
1902
1903Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1904 unsigned NumVals = N->getNumValues();
1905 if (!NumVals)
1906 return Sched::RegPressure;
1907
1908 for (unsigned i = 0; i != NumVals; ++i) {
1909 EVT VT = N->getValueType(i);
1910 if (VT == MVT::Glue || VT == MVT::Other)
1911 continue;
1912 if (VT.isFloatingPoint() || VT.isVector())
1913 return Sched::ILP;
1914 }
1915
1916 if (!N->isMachineOpcode())
1917 return Sched::RegPressure;
1918
1919 // Load are scheduled for latency even if there instruction itinerary
1920 // is not available.
1921 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1922 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1923
1924 if (MCID.getNumDefs() == 0)
1925 return Sched::RegPressure;
1926 if (!Itins->isEmpty() &&
1927 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1928 return Sched::ILP;
1929
1930 return Sched::RegPressure;
1931}
1932
1933//===----------------------------------------------------------------------===//
1934// Lowering Code
1935//===----------------------------------------------------------------------===//
1936
1937static bool isSRL16(const SDValue &Op) {
1938 if (Op.getOpcode() != ISD::SRL)
1939 return false;
1940 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1941 return Const->getZExtValue() == 16;
1942 return false;
1943}
1944
1945static bool isSRA16(const SDValue &Op) {
1946 if (Op.getOpcode() != ISD::SRA)
1947 return false;
1948 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1949 return Const->getZExtValue() == 16;
1950 return false;
1951}
1952
1953static bool isSHL16(const SDValue &Op) {
1954 if (Op.getOpcode() != ISD::SHL)
1955 return false;
1956 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1957 return Const->getZExtValue() == 16;
1958 return false;
1959}
1960
1961// Check for a signed 16-bit value. We special case SRA because it makes it
1962// more simple when also looking for SRAs that aren't sign extending a
1963// smaller value. Without the check, we'd need to take extra care with
1964// checking order for some operations.
1965static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1966 if (isSRA16(Op))
1967 return isSHL16(Op.getOperand(0));
1968 return DAG.ComputeNumSignBits(Op) == 17;
1969}
1970
1971/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1972static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1973 switch (CC) {
1974 default: llvm_unreachable("Unknown condition code!")__builtin_unreachable();
1975 case ISD::SETNE: return ARMCC::NE;
1976 case ISD::SETEQ: return ARMCC::EQ;
1977 case ISD::SETGT: return ARMCC::GT;
1978 case ISD::SETGE: return ARMCC::GE;
1979 case ISD::SETLT: return ARMCC::LT;
1980 case ISD::SETLE: return ARMCC::LE;
1981 case ISD::SETUGT: return ARMCC::HI;
1982 case ISD::SETUGE: return ARMCC::HS;
1983 case ISD::SETULT: return ARMCC::LO;
1984 case ISD::SETULE: return ARMCC::LS;
1985 }
1986}
1987
1988/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1989static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1990 ARMCC::CondCodes &CondCode2) {
1991 CondCode2 = ARMCC::AL;
1992 switch (CC) {
1993 default: llvm_unreachable("Unknown FP condition!")__builtin_unreachable();
1994 case ISD::SETEQ:
1995 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
1996 case ISD::SETGT:
1997 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1998 case ISD::SETGE:
1999 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2000 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2001 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2002 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2003 case ISD::SETO: CondCode = ARMCC::VC; break;
2004 case ISD::SETUO: CondCode = ARMCC::VS; break;
2005 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2006 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2007 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2008 case ISD::SETLT:
2009 case ISD::SETULT: CondCode = ARMCC::LT; break;
2010 case ISD::SETLE:
2011 case ISD::SETULE: CondCode = ARMCC::LE; break;
2012 case ISD::SETNE:
2013 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2014 }
2015}
2016
2017//===----------------------------------------------------------------------===//
2018// Calling Convention Implementation
2019//===----------------------------------------------------------------------===//
2020
2021/// getEffectiveCallingConv - Get the effective calling convention, taking into
2022/// account presence of floating point hardware and calling convention
2023/// limitations, such as support for variadic functions.
2024CallingConv::ID
2025ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2026 bool isVarArg) const {
2027 switch (CC) {
2028 default:
2029 report_fatal_error("Unsupported calling convention");
2030 case CallingConv::ARM_AAPCS:
2031 case CallingConv::ARM_APCS:
2032 case CallingConv::GHC:
2033 case CallingConv::CFGuard_Check:
2034 return CC;
2035 case CallingConv::PreserveMost:
2036 return CallingConv::PreserveMost;
2037 case CallingConv::ARM_AAPCS_VFP:
2038 case CallingConv::Swift:
2039 case CallingConv::SwiftTail:
2040 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
2041 case CallingConv::C:
2042 case CallingConv::Tail:
2043 if (!Subtarget->isAAPCS_ABI())
2044 return CallingConv::ARM_APCS;
2045 else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
2046 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2047 !isVarArg)
2048 return CallingConv::ARM_AAPCS_VFP;
2049 else
2050 return CallingConv::ARM_AAPCS;
2051 case CallingConv::Fast:
2052 case CallingConv::CXX_FAST_TLS:
2053 if (!Subtarget->isAAPCS_ABI()) {
2054 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2055 return CallingConv::Fast;
2056 return CallingConv::ARM_APCS;
2057 } else if (Subtarget->hasVFP2Base() &&
2058 !Subtarget->isThumb1Only() && !isVarArg)
2059 return CallingConv::ARM_AAPCS_VFP;
2060 else
2061 return CallingConv::ARM_AAPCS;
2062 }
2063}
2064
2065CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2066 bool isVarArg) const {
2067 return CCAssignFnForNode(CC, false, isVarArg);
2068}
2069
2070CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
2071 bool isVarArg) const {
2072 return CCAssignFnForNode(CC, true, isVarArg);
2073}
2074
2075/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2076/// CallingConvention.
2077CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2078 bool Return,
2079 bool isVarArg) const {
2080 switch (getEffectiveCallingConv(CC, isVarArg)) {
2081 default:
2082 report_fatal_error("Unsupported calling convention");
2083 case CallingConv::ARM_APCS:
2084 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2085 case CallingConv::ARM_AAPCS:
2086 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2087 case CallingConv::ARM_AAPCS_VFP:
2088 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2089 case CallingConv::Fast:
2090 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2091 case CallingConv::GHC:
2092 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2093 case CallingConv::PreserveMost:
2094 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2095 case CallingConv::CFGuard_Check:
2096 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2097 }
2098}
2099
2100SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2101 MVT LocVT, MVT ValVT, SDValue Val) const {
2102 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2103 Val);
2104 if (Subtarget->hasFullFP16()) {
2105 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2106 } else {
2107 Val = DAG.getNode(ISD::TRUNCATE, dl,
2108 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2109 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2110 }
2111 return Val;
2112}
2113
2114SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2115 MVT LocVT, MVT ValVT,
2116 SDValue Val) const {
2117 if (Subtarget->hasFullFP16()) {
2118 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2119 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2120 } else {
2121 Val = DAG.getNode(ISD::BITCAST, dl,
2122 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2123 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2124 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2125 }
2126 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2127}
2128
2129/// LowerCallResult - Lower the result values of a call into the
2130/// appropriate copies out of appropriate physical registers.
2131SDValue ARMTargetLowering::LowerCallResult(
2132 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2133 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2134 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2135 SDValue ThisVal) const {
2136 // Assign locations to each value returned by this call.
2137 SmallVector<CCValAssign, 16> RVLocs;
2138 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2139 *DAG.getContext());
2140 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2141
2142 // Copy all of the result registers out of their specified physreg.
2143 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2144 CCValAssign VA = RVLocs[i];
2145
2146 // Pass 'this' value directly from the argument to return value, to avoid
2147 // reg unit interference
2148 if (i == 0 && isThisReturn) {
2149 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast<void> (0))
2150 "unexpected return calling convention register assignment")(static_cast<void> (0));
2151 InVals.push_back(ThisVal);
2152 continue;
2153 }
2154
2155 SDValue Val;
2156 if (VA.needsCustom() &&
2157 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2158 // Handle f64 or half of a v2f64.
2159 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2160 InFlag);
2161 Chain = Lo.getValue(1);
2162 InFlag = Lo.getValue(2);
2163 VA = RVLocs[++i]; // skip ahead to next loc
2164 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2165 InFlag);
2166 Chain = Hi.getValue(1);
2167 InFlag = Hi.getValue(2);
2168 if (!Subtarget->isLittle())
2169 std::swap (Lo, Hi);
2170 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2171
2172 if (VA.getLocVT() == MVT::v2f64) {
2173 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2174 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2175 DAG.getConstant(0, dl, MVT::i32));
2176
2177 VA = RVLocs[++i]; // skip ahead to next loc
2178 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2179 Chain = Lo.getValue(1);
2180 InFlag = Lo.getValue(2);
2181 VA = RVLocs[++i]; // skip ahead to next loc
2182 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
2183 Chain = Hi.getValue(1);
2184 InFlag = Hi.getValue(2);
2185 if (!Subtarget->isLittle())
2186 std::swap (Lo, Hi);
2187 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2188 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2189 DAG.getConstant(1, dl, MVT::i32));
2190 }
2191 } else {
2192 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2193 InFlag);
2194 Chain = Val.getValue(1);
2195 InFlag = Val.getValue(2);
2196 }
2197
2198 switch (VA.getLocInfo()) {
2199 default: llvm_unreachable("Unknown loc info!")__builtin_unreachable();
2200 case CCValAssign::Full: break;
2201 case CCValAssign::BCvt:
2202 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2203 break;
2204 }
2205
2206 // f16 arguments have their size extended to 4 bytes and passed as if they
2207 // had been copied to the LSBs of a 32-bit register.
2208 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2209 if (VA.needsCustom() &&
2210 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2211 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2212
2213 InVals.push_back(Val);
2214 }
2215
2216 return Chain;
2217}
2218
2219std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2220 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2221 bool IsTailCall, int SPDiff) const {
2222 SDValue DstAddr;
2223 MachinePointerInfo DstInfo;
2224 int32_t Offset = VA.getLocMemOffset();
2225 MachineFunction &MF = DAG.getMachineFunction();
2226
2227 if (IsTailCall) {
2228 Offset += SPDiff;
2229 auto PtrVT = getPointerTy(DAG.getDataLayout());
2230 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2231 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2232 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2233 DstInfo =
2234 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
2235 } else {
2236 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2237 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2238 StackPtr, PtrOff);
2239 DstInfo =
2240 MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset);
2241 }
2242
2243 return std::make_pair(DstAddr, DstInfo);
2244}
2245
2246void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2247 SDValue Chain, SDValue &Arg,
2248 RegsToPassVector &RegsToPass,
2249 CCValAssign &VA, CCValAssign &NextVA,
2250 SDValue &StackPtr,
2251 SmallVectorImpl<SDValue> &MemOpChains,
2252 bool IsTailCall,
2253 int SPDiff) const {
2254 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2255 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2256 unsigned id = Subtarget->isLittle() ? 0 : 1;
2257 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2258
2259 if (NextVA.isRegLoc())
2260 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2261 else {
2262 assert(NextVA.isMemLoc())(static_cast<void> (0));
2263 if (!StackPtr.getNode())
2264 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2265 getPointerTy(DAG.getDataLayout()));
2266
2267 SDValue DstAddr;
2268 MachinePointerInfo DstInfo;
2269 std::tie(DstAddr, DstInfo) =
2270 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2271 MemOpChains.push_back(
2272 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2273 }
2274}
2275
2276static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2277 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2278 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
2279}
2280
2281/// LowerCall - Lowering a call into a callseq_start <-
2282/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2283/// nodes.
2284SDValue
2285ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2286 SmallVectorImpl<SDValue> &InVals) const {
2287 SelectionDAG &DAG = CLI.DAG;
2288 SDLoc &dl = CLI.DL;
2289 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2290 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2291 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2292 SDValue Chain = CLI.Chain;
2293 SDValue Callee = CLI.Callee;
2294 bool &isTailCall = CLI.IsTailCall;
2295 CallingConv::ID CallConv = CLI.CallConv;
2296 bool doesNotRet = CLI.DoesNotReturn;
2297 bool isVarArg = CLI.IsVarArg;
2298
2299 MachineFunction &MF = DAG.getMachineFunction();
2300 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2301 MachineFunction::CallSiteInfo CSInfo;
2302 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2303 bool isThisReturn = false;
2304 bool isCmseNSCall = false;
2305 bool isSibCall = false;
2306 bool PreferIndirect = false;
2307
2308 // Determine whether this is a non-secure function call.
2309 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2310 isCmseNSCall = true;
2311
2312 // Disable tail calls if they're not supported.
2313 if (!Subtarget->supportsTailCall())
2314 isTailCall = false;
2315
2316 // For both the non-secure calls and the returns from a CMSE entry function,
2317 // the function needs to do some extra work afte r the call, or before the
2318 // return, respectively, thus it cannot end with atail call
2319 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2320 isTailCall = false;
2321
2322 if (isa<GlobalAddressSDNode>(Callee)) {
2323 // If we're optimizing for minimum size and the function is called three or
2324 // more times in this block, we can improve codesize by calling indirectly
2325 // as BLXr has a 16-bit encoding.
2326 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2327 if (CLI.CB) {
2328 auto *BB = CLI.CB->getParent();
2329 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2330 count_if(GV->users(), [&BB](const User *U) {
2331 return isa<Instruction>(U) &&
2332 cast<Instruction>(U)->getParent() == BB;
2333 }) > 2;
2334 }
2335 }
2336 if (isTailCall) {
2337 // Check if it's really possible to do a tail call.
2338 isTailCall = IsEligibleForTailCallOptimization(
2339 Callee, CallConv, isVarArg, isStructRet,
2340 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2341 PreferIndirect);
2342
2343 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2344 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2345 isSibCall = true;
2346
2347 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2348 // detected sibcalls.
2349 if (isTailCall)
2350 ++NumTailCalls;
2351 }
2352
2353 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2354 report_fatal_error("failed to perform tail call elimination on a call "
2355 "site marked musttail");
2356 // Analyze operands of the call, assigning locations to each operand.
2357 SmallVector<CCValAssign, 16> ArgLocs;
2358 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2359 *DAG.getContext());
2360 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2361
2362 // Get a count of how many bytes are to be pushed on the stack.
2363 unsigned NumBytes = CCInfo.getNextStackOffset();
2364
2365 // SPDiff is the byte offset of the call's argument area from the callee's.
2366 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2367 // by this amount for a tail call. In a sibling call it must be 0 because the
2368 // caller will deallocate the entire stack and the callee still expects its
2369 // arguments to begin at SP+0. Completely unused for non-tail calls.
2370 int SPDiff = 0;
2371
2372 if (isTailCall && !isSibCall) {
2373 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2374 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2375
2376 // Since callee will pop argument stack as a tail call, we must keep the
2377 // popped size 16-byte aligned.
2378 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2379 NumBytes = alignTo(NumBytes, StackAlign);
2380
2381 // SPDiff will be negative if this tail call requires more space than we
2382 // would automatically have in our incoming argument space. Positive if we
2383 // can actually shrink the stack.
2384 SPDiff = NumReusableBytes - NumBytes;
2385
2386 // If this call requires more stack than we have available from
2387 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2388 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2389 AFI->setArgRegsSaveSize(-SPDiff);
2390 }
2391
2392 if (isSibCall) {
2393 // For sibling tail calls, memory operands are available in our caller's stack.
2394 NumBytes = 0;
2395 } else {
2396 // Adjust the stack pointer for the new arguments...
2397 // These operations are automatically eliminated by the prolog/epilog pass
2398 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2399 }
2400
2401 SDValue StackPtr =
2402 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2403
2404 RegsToPassVector RegsToPass;
2405 SmallVector<SDValue, 8> MemOpChains;
2406
2407 // During a tail call, stores to the argument area must happen after all of
2408 // the function's incoming arguments have been loaded because they may alias.
2409 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2410 // there's no point in doing so repeatedly so this tracks whether that's
2411 // happened yet.
2412 bool AfterFormalArgLoads = false;
2413
2414 // Walk the register/memloc assignments, inserting copies/loads. In the case
2415 // of tail call optimization, arguments are handled later.
2416 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2417 i != e;
2418 ++i, ++realArgIdx) {
2419 CCValAssign &VA = ArgLocs[i];
2420 SDValue Arg = OutVals[realArgIdx];
2421 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2422 bool isByVal = Flags.isByVal();
2423
2424 // Promote the value if needed.
2425 switch (VA.getLocInfo()) {
2426 default: llvm_unreachable("Unknown loc info!")__builtin_unreachable();
2427 case CCValAssign::Full: break;
2428 case CCValAssign::SExt:
2429 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2430 break;
2431 case CCValAssign::ZExt:
2432 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2433 break;
2434 case CCValAssign::AExt:
2435 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2436 break;
2437 case CCValAssign::BCvt:
2438 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2439 break;
2440 }
2441
2442 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2443 Chain = DAG.getStackArgumentTokenFactor(Chain);
2444 AfterFormalArgLoads = true;
2445 }
2446
2447 // f16 arguments have their size extended to 4 bytes and passed as if they
2448 // had been copied to the LSBs of a 32-bit register.
2449 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2450 if (VA.needsCustom() &&
2451 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2452 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2453 } else {
2454 // f16 arguments could have been extended prior to argument lowering.
2455 // Mask them arguments if this is a CMSE nonsecure call.
2456 auto ArgVT = Outs[realArgIdx].ArgVT;
2457 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2458 auto LocBits = VA.getLocVT().getSizeInBits();
2459 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2460 SDValue Mask =
2461 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2462 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2463 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2464 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2465 }
2466 }
2467
2468 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2469 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2470 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2471 DAG.getConstant(0, dl, MVT::i32));
2472 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2473 DAG.getConstant(1, dl, MVT::i32));
2474
2475 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2476 StackPtr, MemOpChains, isTailCall, SPDiff);
2477
2478 VA = ArgLocs[++i]; // skip ahead to next loc
2479 if (VA.isRegLoc()) {
2480 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2481 StackPtr, MemOpChains, isTailCall, SPDiff);
2482 } else {
2483 assert(VA.isMemLoc())(static_cast<void> (0));
2484 SDValue DstAddr;
2485 MachinePointerInfo DstInfo;
2486 std::tie(DstAddr, DstInfo) =
2487 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2488 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2489 }
2490 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2491 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2492 StackPtr, MemOpChains, isTailCall, SPDiff);
2493 } else if (VA.isRegLoc()) {
2494 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2495 Outs[0].VT == MVT::i32) {
2496 assert(VA.getLocVT() == MVT::i32 &&(static_cast<void> (0))
2497 "unexpected calling convention register assignment")(static_cast<void> (0));
2498 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast<void> (0))
2499 "unexpected use of 'returned'")(static_cast<void> (0));
2500 isThisReturn = true;
2501 }
2502 const TargetOptions &Options = DAG.getTarget().Options;
2503 if (Options.EmitCallSiteInfo)
2504 CSInfo.emplace_back(VA.getLocReg(), i);
2505 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2506 } else if (isByVal) {
2507 assert(VA.isMemLoc())(static_cast<void> (0));
2508 unsigned offset = 0;
2509
2510 // True if this byval aggregate will be split between registers
2511 // and memory.
2512 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2513 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2514
2515 if (CurByValIdx < ByValArgsCount) {
2516
2517 unsigned RegBegin, RegEnd;
2518 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2519
2520 EVT PtrVT =
2521 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2522 unsigned int i, j;
2523 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2524 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2525 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2526 SDValue Load =
2527 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2528 DAG.InferPtrAlign(AddArg));
2529 MemOpChains.push_back(Load.getValue(1));
2530 RegsToPass.push_back(std::make_pair(j, Load));
2531 }
2532
2533 // If parameter size outsides register area, "offset" value
2534 // helps us to calculate stack slot for remained part properly.
2535 offset = RegEnd - RegBegin;
2536
2537 CCInfo.nextInRegsParam();
2538 }
2539
2540 if (Flags.getByValSize() > 4*offset) {
2541 auto PtrVT = getPointerTy(DAG.getDataLayout());
2542 SDValue Dst;
2543 MachinePointerInfo DstInfo;
2544 std::tie(Dst, DstInfo) =
2545 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2546 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2547 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2548 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2549 MVT::i32);
2550 SDValue AlignNode =
2551 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2552
2553 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2554 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2555 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2556 Ops));
2557 }
2558 } else {
2559 assert(VA.isMemLoc())(static_cast<void> (0));
2560 SDValue DstAddr;
2561 MachinePointerInfo DstInfo;
2562 std::tie(DstAddr, DstInfo) =
2563 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2564
2565 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2566 MemOpChains.push_back(Store);
2567 }
2568 }
2569
2570 if (!MemOpChains.empty())
2571 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2572
2573 // Build a sequence of copy-to-reg nodes chained together with token chain
2574 // and flag operands which copy the outgoing args into the appropriate regs.
2575 SDValue InFlag;
2576 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2577 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2578 RegsToPass[i].second, InFlag);
2579 InFlag = Chain.getValue(1);
2580 }
2581
2582 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2583 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2584 // node so that legalize doesn't hack it.
2585 bool isDirect = false;
2586
2587 const TargetMachine &TM = getTargetMachine();
2588 const Module *Mod = MF.getFunction().getParent();
2589 const GlobalValue *GV = nullptr;
2590 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2591 GV = G->getGlobal();
2592 bool isStub =
2593 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
2594
2595 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2596 bool isLocalARMFunc = false;
2597 auto PtrVt = getPointerTy(DAG.getDataLayout());
2598
2599 if (Subtarget->genLongCalls()) {
2600 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast<void> (0))
2601 "long-calls codegen is not position independent!")(static_cast<void> (0));
2602 // Handle a global address or an external symbol. If it's not one of
2603 // those, the target's already in a register, so we don't need to do
2604 // anything extra.
2605 if (isa<GlobalAddressSDNode>(Callee)) {
2606 // Create a constant pool entry for the callee address
2607 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2608 ARMConstantPoolValue *CPV =
2609 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2610
2611 // Get the address of the callee into a register
2612 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2613 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2614 Callee = DAG.getLoad(
2615 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2616 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2617 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2618 const char *Sym = S->getSymbol();
2619
2620 // Create a constant pool entry for the callee address
2621 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2622 ARMConstantPoolValue *CPV =
2623 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2624 ARMPCLabelIndex, 0);
2625 // Get the address of the callee into a register
2626 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2627 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2628 Callee = DAG.getLoad(
2629 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2630 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2631 }
2632 } else if (isa<GlobalAddressSDNode>(Callee)) {
2633 if (!PreferIndirect) {
2634 isDirect = true;
2635 bool isDef = GV->isStrongDefinitionForLinker();
2636
2637 // ARM call to a local ARM function is predicable.
2638 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2639 // tBX takes a register source operand.
2640 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2641 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast<void> (0));
2642 Callee = DAG.getNode(
2643 ARMISD::WrapperPIC, dl, PtrVt,
2644 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2645 Callee = DAG.getLoad(
2646 PtrVt, dl, DAG.getEntryNode(), Callee,
2647 MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),
2648 MachineMemOperand::MODereferenceable |
2649 MachineMemOperand::MOInvariant);
2650 } else if (Subtarget->isTargetCOFF()) {
2651 assert(Subtarget->isTargetWindows() &&(static_cast<void> (0))
2652 "Windows is the only supported COFF target")(static_cast<void> (0));
2653 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2654 if (GV->hasDLLImportStorageClass())
2655 TargetFlags = ARMII::MO_DLLIMPORT;
2656 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
2657 TargetFlags = ARMII::MO_COFFSTUB;
2658 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
2659 TargetFlags);
2660 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2661 Callee =
2662 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2663 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2664 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2665 } else {
2666 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2667 }
2668 }
2669 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2670 isDirect = true;
2671 // tBX takes a register source operand.
2672 const char *Sym = S->getSymbol();
2673 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2674 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2675 ARMConstantPoolValue *CPV =
2676 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2677 ARMPCLabelIndex, 4);
2678 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2679 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2680 Callee = DAG.getLoad(
2681 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2682 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2683 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2684 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2685 } else {
2686 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2687 }
2688 }
2689
2690 if (isCmseNSCall) {
2691 assert(!isARMFunc && !isDirect &&(static_cast<void> (0))
2692 "Cannot handle call to ARM function or direct call")(static_cast<void> (0));
2693 if (NumBytes > 0) {
2694 DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
2695 "call to non-secure function would "
2696 "require passing arguments on stack",
2697 dl.getDebugLoc());
2698 DAG.getContext()->diagnose(Diag);
2699 }
2700 if (isStructRet) {
2701 DiagnosticInfoUnsupported Diag(
2702 DAG.getMachineFunction().getFunction(),
2703 "call to non-secure function would return value through pointer",
2704 dl.getDebugLoc());
2705 DAG.getContext()->diagnose(Diag);
2706 }
2707 }
2708
2709 // FIXME: handle tail calls differently.
2710 unsigned CallOpc;
2711 if (Subtarget->isThumb()) {
2712 if (isCmseNSCall)
2713 CallOpc = ARMISD::tSECALL;
2714 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2715 CallOpc = ARMISD::CALL_NOLINK;
2716 else
2717 CallOpc = ARMISD::CALL;
2718 } else {
2719 if (!isDirect && !Subtarget->hasV5TOps())
2720 CallOpc = ARMISD::CALL_NOLINK;
2721 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2722 // Emit regular call when code size is the priority
2723 !Subtarget->hasMinSize())
2724 // "mov lr, pc; b _foo" to avoid confusing the RSP
2725 CallOpc = ARMISD::CALL_NOLINK;
2726 else
2727 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2728 }
2729
2730 // We don't usually want to end the call-sequence here because we would tidy
2731 // the frame up *after* the call, however in the ABI-changing tail-call case
2732 // we've carefully laid out the parameters so that when sp is reset they'll be
2733 // in the correct location.
2734 if (isTailCall && !isSibCall) {
2735 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
2736 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2737 InFlag = Chain.getValue(1);
2738 }
2739
2740 std::vector<SDValue> Ops;
2741 Ops.push_back(Chain);
2742 Ops.push_back(Callee);
2743
2744 if (isTailCall) {
2745 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2746 }
2747
2748 // Add argument registers to the end of the list so that they are known live
2749 // into the call.
2750 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2751 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2752 RegsToPass[i].second.getValueType()));
2753
2754 // Add a register mask operand representing the call-preserved registers.
2755 if (!isTailCall) {
2756 const uint32_t *Mask;
2757 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2758 if (isThisReturn) {
2759 // For 'this' returns, use the R0-preserving mask if applicable
2760 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2761 if (!Mask) {
2762 // Set isThisReturn to false if the calling convention is not one that
2763 // allows 'returned' to be modeled in this way, so LowerCallResult does
2764 // not try to pass 'this' straight through
2765 isThisReturn = false;
2766 Mask = ARI->getCallPreservedMask(MF, CallConv);
2767 }
2768 } else
2769 Mask = ARI->getCallPreservedMask(MF, CallConv);
2770
2771 assert(Mask && "Missing call preserved mask for calling convention")(static_cast<void> (0));
2772 Ops.push_back(DAG.getRegisterMask(Mask));
2773 }
2774
2775 if (InFlag.getNode())
2776 Ops.push_back(InFlag);
2777
2778 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2779 if (isTailCall) {
2780 MF.getFrameInfo().setHasTailCall();
2781 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2782 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2783 return Ret;
2784 }
2785
2786 // Returns a chain and a flag for retval copy to use.
2787 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2788 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2789 InFlag = Chain.getValue(1);
2790 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2791
2792 // If we're guaranteeing tail-calls will be honoured, the callee must
2793 // pop its own argument stack on return. But this call is *not* a tail call so
2794 // we need to undo that after it returns to restore the status-quo.
2795 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2796 uint64_t CalleePopBytes =
2797 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2798
2799 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2800 DAG.getIntPtrConstant(CalleePopBytes, dl, true),
2801 InFlag, dl);
2802 if (!Ins.empty())
2803 InFlag = Chain.getValue(1);
2804
2805 // Handle result values, copying them out of physregs into vregs that we
2806 // return.
2807 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2808 InVals, isThisReturn,
2809 isThisReturn ? OutVals[0] : SDValue());
2810}
2811
2812/// HandleByVal - Every parameter *after* a byval parameter is passed
2813/// on the stack. Remember the next parameter register to allocate,
2814/// and then confiscate the rest of the parameter registers to insure
2815/// this.
2816void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2817 Align Alignment) const {
2818 // Byval (as with any stack) slots are always at least 4 byte aligned.
2819 Alignment = std::max(Alignment, Align(4));
2820
2821 unsigned Reg = State->AllocateReg(GPRArgRegs);
2822 if (!Reg)
2823 return;
2824
2825 unsigned AlignInRegs = Alignment.value() / 4;
2826 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2827 for (unsigned i = 0; i < Waste; ++i)
2828 Reg = State->AllocateReg(GPRArgRegs);
2829
2830 if (!Reg)
2831 return;
2832
2833 unsigned Excess = 4 * (ARM::R4 - Reg);
2834
2835 // Special case when NSAA != SP and parameter size greater than size of
2836 // all remained GPR regs. In that case we can't split parameter, we must
2837 // send it to stack. We also must set NCRN to R4, so waste all
2838 // remained registers.
2839 const unsigned NSAAOffset = State->getNextStackOffset();
2840 if (NSAAOffset != 0 && Size > Excess) {
2841 while (State->AllocateReg(GPRArgRegs))
2842 ;
2843 return;
2844 }
2845
2846 // First register for byval parameter is the first register that wasn't
2847 // allocated before this method call, so it would be "reg".
2848 // If parameter is small enough to be saved in range [reg, r4), then
2849 // the end (first after last) register would be reg + param-size-in-regs,
2850 // else parameter would be splitted between registers and stack,
2851 // end register would be r4 in this case.
2852 unsigned ByValRegBegin = Reg;
2853 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2854 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2855 // Note, first register is allocated in the beginning of function already,
2856 // allocate remained amount of registers we need.
2857 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2858 State->AllocateReg(GPRArgRegs);
2859 // A byval parameter that is split between registers and memory needs its
2860 // size truncated here.
2861 // In the case where the entire structure fits in registers, we set the
2862 // size in memory to zero.
2863 Size = std::max<int>(Size - Excess, 0);
2864}
2865
2866/// MatchingStackOffset - Return true if the given stack call argument is
2867/// already available in the same position (relatively) of the caller's
2868/// incoming argument stack.
2869static
2870bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2871 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2872 const TargetInstrInfo *TII) {
2873 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2874 int FI = std::numeric_limits<int>::max();
2875 if (Arg.getOpcode() == ISD::CopyFromReg) {
2876 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2877 if (!Register::isVirtualRegister(VR))
2878 return false;
2879 MachineInstr *Def = MRI->getVRegDef(VR);
2880 if (!Def)
2881 return false;
2882 if (!Flags.isByVal()) {
2883 if (!TII->isLoadFromStackSlot(*Def, FI))
2884 return false;
2885 } else {
2886 return false;
2887 }
2888 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2889 if (Flags.isByVal())
2890 // ByVal argument is passed in as a pointer but it's now being
2891 // dereferenced. e.g.
2892 // define @foo(%struct.X* %A) {
2893 // tail call @bar(%struct.X* byval %A)
2894 // }
2895 return false;
2896 SDValue Ptr = Ld->getBasePtr();
2897 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2898 if (!FINode)
2899 return false;
2900 FI = FINode->getIndex();
2901 } else
2902 return false;
2903
2904 assert(FI != std::numeric_limits<int>::max())(static_cast<void> (0));
2905 if (!MFI.isFixedObjectIndex(FI))
2906 return false;
2907 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2908}
2909
2910/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2911/// for tail call optimization. Targets which want to do tail call
2912/// optimization should implement this function.
2913bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2914 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2915 bool isCalleeStructRet, bool isCallerStructRet,
2916 const SmallVectorImpl<ISD::OutputArg> &Outs,
2917 const SmallVectorImpl<SDValue> &OutVals,
2918 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
2919 const bool isIndirect) const {
2920 MachineFunction &MF = DAG.getMachineFunction();
2921 const Function &CallerF = MF.getFunction();
2922 CallingConv::ID CallerCC = CallerF.getCallingConv();
2923
2924 assert(Subtarget->supportsTailCall())(static_cast<void> (0));
2925
2926 // Indirect tail calls cannot be optimized for Thumb1 if the args
2927 // to the call take up r0-r3. The reason is that there are no legal registers
2928 // left to hold the pointer to the function to be called.
2929 if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2930 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
2931 return false;
2932
2933 // Look for obvious safe cases to perform tail call optimization that do not
2934 // require ABI changes. This is what gcc calls sibcall.
2935
2936 // Exception-handling functions need a special set of instructions to indicate
2937 // a return to the hardware. Tail-calling another function would probably
2938 // break this.
2939 if (CallerF.hasFnAttribute("interrupt"))
2940 return false;
2941
2942 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
2943 return CalleeCC == CallerCC;
2944
2945 // Also avoid sibcall optimization if either caller or callee uses struct
2946 // return semantics.
2947 if (isCalleeStructRet || isCallerStructRet)
2948 return false;
2949
2950 // Externally-defined functions with weak linkage should not be
2951 // tail-called on ARM when the OS does not support dynamic
2952 // pre-emption of symbols, as the AAELF spec requires normal calls
2953 // to undefined weak functions to be replaced with a NOP or jump to the
2954 // next instruction. The behaviour of branch instructions in this
2955 // situation (as used for tail calls) is implementation-defined, so we
2956 // cannot rely on the linker replacing the tail call with a return.
2957 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2958 const GlobalValue *GV = G->getGlobal();
2959 const Triple &TT = getTargetMachine().getTargetTriple();
2960 if (GV->hasExternalWeakLinkage() &&
2961 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2962 return false;
2963 }
2964
2965 // Check that the call results are passed in the same way.
2966 LLVMContext &C = *DAG.getContext();
2967 if (!CCState::resultsCompatible(
2968 getEffectiveCallingConv(CalleeCC, isVarArg),
2969 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
2970 CCAssignFnForReturn(CalleeCC, isVarArg),
2971 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
2972 return false;
2973 // The callee has to preserve all registers the caller needs to preserve.
2974 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2975 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2976 if (CalleeCC != CallerCC) {
2977 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2978 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2979 return false;
2980 }
2981
2982 // If Caller's vararg or byval argument has been split between registers and
2983 // stack, do not perform tail call, since part of the argument is in caller's
2984 // local frame.
2985 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2986 if (AFI_Caller->getArgRegsSaveSize())
2987 return false;
2988
2989 // If the callee takes no arguments then go on to check the results of the
2990 // call.
2991 if (!Outs.empty()) {
2992 // Check if stack adjustment is needed. For now, do not do this if any
2993 // argument is passed on the stack.
2994 SmallVector<CCValAssign, 16> ArgLocs;
2995 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2996 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2997 if (CCInfo.getNextStackOffset()) {
2998 // Check if the arguments are already laid out in the right way as
2999 // the caller's fixed stack objects.
3000 MachineFrameInfo &MFI = MF.getFrameInfo();
3001 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3002 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3003 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3004 i != e;
3005 ++i, ++realArgIdx) {
3006 CCValAssign &VA = ArgLocs[i];
3007 EVT RegVT = VA.getLocVT();
3008 SDValue Arg = OutVals[realArgIdx];
3009 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3010 if (VA.getLocInfo() == CCValAssign::Indirect)
3011 return false;
3012 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3013 // f64 and vector types are split into multiple registers or
3014 // register/stack-slot combinations. The types will not match
3015 // the registers; give up on memory f64 refs until we figure
3016 // out what to do about this.
3017 if (!VA.isRegLoc())
3018 return false;
3019 if (!ArgLocs[++i].isRegLoc())
3020 return false;
3021 if (RegVT == MVT::v2f64) {
3022 if (!ArgLocs[++i].isRegLoc())
3023 return false;
3024 if (!ArgLocs[++i].isRegLoc())
3025 return false;
3026 }
3027 } else if (!VA.isRegLoc()) {
3028 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3029 MFI, MRI, TII))
3030 return false;
3031 }
3032 }
3033 }
3034
3035 const MachineRegisterInfo &MRI = MF.getRegInfo();
3036 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3037 return false;
3038 }
3039
3040 return true;
3041}
3042
3043bool
3044ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3045 MachineFunction &MF, bool isVarArg,
3046 const SmallVectorImpl<ISD::OutputArg> &Outs,
3047 LLVMContext &Context) const {
3048 SmallVector<CCValAssign, 16> RVLocs;
3049 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3050 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3051}
3052
3053static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
3054 const SDLoc &DL, SelectionDAG &DAG) {
3055 const MachineFunction &MF = DAG.getMachineFunction();
3056 const Function &F = MF.getFunction();
3057
3058 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3059
3060 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3061 // version of the "preferred return address". These offsets affect the return
3062 // instruction if this is a return from PL1 without hypervisor extensions.
3063 // IRQ/FIQ: +4 "subs pc, lr, #4"
3064 // SWI: 0 "subs pc, lr, #0"
3065 // ABORT: +4 "subs pc, lr, #4"
3066 // UNDEF: +4/+2 "subs pc, lr, #0"
3067 // UNDEF varies depending on where the exception came from ARM or Thumb
3068 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3069
3070 int64_t LROffset;
3071 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3072 IntKind == "ABORT")
3073 LROffset = 4;
3074 else if (IntKind == "SWI" || IntKind == "UNDEF")
3075 LROffset = 0;
3076 else
3077 report_fatal_error("Unsupported interrupt attribute. If present, value "
3078 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3079
3080 RetOps.insert(RetOps.begin() + 1,
3081 DAG.getConstant(LROffset, DL, MVT::i32, false));
3082
3083 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
3084}
3085
3086SDValue
3087ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3088 bool isVarArg,
3089 const SmallVectorImpl<ISD::OutputArg> &Outs,
3090 const SmallVectorImpl<SDValue> &OutVals,
3091 const SDLoc &dl, SelectionDAG &DAG) const {
3092 // CCValAssign - represent the assignment of the return value to a location.
3093 SmallVector<CCValAssign, 16> RVLocs;
3094
3095 // CCState - Info about the registers and stack slots.
3096 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3097 *DAG.getContext());
3098
3099 // Analyze outgoing return values.
3100 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3101
3102 SDValue Flag;
3103 SmallVector<SDValue, 4> RetOps;
3104 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3105 bool isLittleEndian = Subtarget->isLittle();
3106
3107 MachineFunction &MF = DAG.getMachineFunction();
3108 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3109 AFI->setReturnRegsCount(RVLocs.size());
3110
3111 // Report error if cmse entry function returns structure through first ptr arg.
3112 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3113 // Note: using an empty SDLoc(), as the first line of the function is a
3114 // better place to report than the last line.
3115 DiagnosticInfoUnsupported Diag(
3116 DAG.getMachineFunction().getFunction(),
3117 "secure entry function would return value through pointer",
3118 SDLoc().getDebugLoc());
3119 DAG.getContext()->diagnose(Diag);
3120 }
3121
3122 // Copy the result values into the output registers.
3123 for (unsigned i = 0, realRVLocIdx = 0;
3124 i != RVLocs.size();
3125 ++i, ++realRVLocIdx) {
3126 CCValAssign &VA = RVLocs[i];
3127 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast<void> (0));
3128
3129 SDValue Arg = OutVals[realRVLocIdx];
3130 bool ReturnF16 = false;
3131
3132 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3133 // Half-precision return values can be returned like this:
3134 //
3135 // t11 f16 = fadd ...
3136 // t12: i16 = bitcast t11
3137 // t13: i32 = zero_extend t12
3138 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3139 //
3140 // to avoid code generation for bitcasts, we simply set Arg to the node
3141 // that produces the f16 value, t11 in this case.
3142 //
3143 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3144 SDValue ZE = Arg.getOperand(0);
3145 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3146 SDValue BC = ZE.getOperand(0);
3147 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3148 Arg = BC.getOperand(0);
3149 ReturnF16 = true;
3150 }
3151 }
3152 }
3153 }
3154
3155 switch (VA.getLocInfo()) {
3156 default: llvm_unreachable("Unknown loc info!")__builtin_unreachable();
3157 case CCValAssign::Full: break;
3158 case CCValAssign::BCvt:
3159 if (!ReturnF16)
3160 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3161 break;
3162 }
3163
3164 // Mask f16 arguments if this is a CMSE nonsecure entry.
3165 auto RetVT = Outs[realRVLocIdx].ArgVT;
3166 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3167 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3168 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3169 } else {
3170 auto LocBits = VA.getLocVT().getSizeInBits();
3171 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3172 SDValue Mask =
3173 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3174 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3175 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3176 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3177 }
3178 }
3179
3180 if (VA.needsCustom() &&
3181 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3182 if (VA.getLocVT() == MVT::v2f64) {
3183 // Extract the first half and return it in two registers.
3184 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3185 DAG.getConstant(0, dl, MVT::i32));
3186 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3187 DAG.getVTList(MVT::i32, MVT::i32), Half);
3188
3189 Chain =
3190 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3191 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
3192 Flag = Chain.getValue(1);
3193 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3194 VA = RVLocs[++i]; // skip ahead to next loc
3195 Chain =
3196 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3197 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
3198 Flag = Chain.getValue(1);
3199 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3200 VA = RVLocs[++i]; // skip ahead to next loc
3201
3202 // Extract the 2nd half and fall through to handle it as an f64 value.
3203 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3204 DAG.getConstant(1, dl, MVT::i32));
3205 }
3206 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3207 // available.
3208 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3209 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3210 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3211 fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
3212 Flag = Chain.getValue(1);
3213 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3214 VA = RVLocs[++i]; // skip ahead to next loc
3215 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3216 fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
3217 } else
3218 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
3219
3220 // Guarantee that all emitted copies are
3221 // stuck together, avoiding something bad.
3222 Flag = Chain.getValue(1);
3223 RetOps.push_back(DAG.getRegister(
3224 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3225 }
3226 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3227 const MCPhysReg *I =
3228 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3229 if (I) {
3230 for (; *I; ++I) {
3231 if (ARM::GPRRegClass.contains(*I))
3232 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3233 else if (ARM::DPRRegClass.contains(*I))
3234 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3235 else
3236 llvm_unreachable("Unexpected register class in CSRsViaCopy!")__builtin_unreachable();
3237 }
3238 }
3239
3240 // Update chain and glue.
3241 RetOps[0] = Chain;
3242 if (Flag.getNode())
3243 RetOps.push_back(Flag);
3244
3245 // CPUs which aren't M-class use a special sequence to return from
3246 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3247 // though we use "subs pc, lr, #N").
3248 //
3249 // M-class CPUs actually use a normal return sequence with a special
3250 // (hardware-provided) value in LR, so the normal code path works.
3251 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3252 !Subtarget->isMClass()) {
3253 if (Subtarget->isThumb1Only())
3254 report_fatal_error("interrupt attribute is not supported in Thumb1");
3255 return LowerInterruptReturn(RetOps, dl, DAG);
3256 }
3257
3258 ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
3259 ARMISD::RET_FLAG;
3260 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3261}
3262
3263bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3264 if (N->getNumValues() != 1)
3265 return false;
3266 if (!N->hasNUsesOfValue(1, 0))
3267 return false;
3268
3269 SDValue TCChain = Chain;
3270 SDNode *Copy = *N->use_begin();
3271 if (Copy->getOpcode() == ISD::CopyToReg) {
3272 // If the copy has a glue operand, we conservatively assume it isn't safe to
3273 // perform a tail call.
3274 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3275 return false;
3276 TCChain = Copy->getOperand(0);
3277 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3278 SDNode *VMov = Copy;
3279 // f64 returned in a pair of GPRs.
3280 SmallPtrSet<SDNode*, 2> Copies;
3281 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3282 UI != UE; ++UI) {
3283 if (UI->getOpcode() != ISD::CopyToReg)
3284 return false;
3285 Copies.insert(*UI);
3286 }
3287 if (Copies.size() > 2)
3288 return false;
3289
3290 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
3291 UI != UE; ++UI) {
3292 SDValue UseChain = UI->getOperand(0);
3293 if (Copies.count(UseChain.getNode()))
3294 // Second CopyToReg
3295 Copy = *UI;
3296 else {
3297 // We are at the top of this chain.
3298 // If the copy has a glue operand, we conservatively assume it
3299 // isn't safe to perform a tail call.
3300 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
3301 return false;
3302 // First CopyToReg
3303 TCChain = UseChain;
3304 }
3305 }
3306 } else if (Copy->getOpcode() == ISD::BITCAST) {
3307 // f32 returned in a single GPR.
3308 if (!Copy->hasOneUse())
3309 return false;
3310 Copy = *Copy->use_begin();
3311 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3312 return false;
3313 // If the copy has a glue operand, we conservatively assume it isn't safe to
3314 // perform a tail call.
3315 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3316 return false;
3317 TCChain = Copy->getOperand(0);
3318 } else {
3319 return false;
3320 }
3321
3322 bool HasRet = false;
3323 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
3324 UI != UE; ++UI) {
3325 if (UI->getOpcode() != ARMISD::RET_FLAG &&
3326 UI->getOpcode() != ARMISD::INTRET_FLAG)
3327 return false;
3328 HasRet = true;
3329 }
3330
3331 if (!HasRet)
3332 return false;
3333
3334 Chain = TCChain;
3335 return true;
3336}
3337
3338bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3339 if (!Subtarget->supportsTailCall())
3340 return false;
3341
3342 if (!CI->isTailCall())
3343 return false;
3344
3345 return true;
3346}
3347
3348// Trying to write a 64 bit value so need to split into two 32 bit values first,
3349// and pass the lower and high parts through.
3350static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
3351 SDLoc DL(Op);
3352 SDValue WriteValue = Op->getOperand(2);
3353
3354 // This function is only supposed to be called for i64 type argument.
3355 assert(WriteValue.getValueType() == MVT::i64(static_cast<void> (0))
3356 && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast<void> (0));
3357
3358 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3359 DAG.getConstant(0, DL, MVT::i32));
3360 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
3361 DAG.getConstant(1, DL, MVT::i32));
3362 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3363 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3364}
3365
3366// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3367// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3368// one of the above mentioned nodes. It has to be wrapped because otherwise
3369// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3370// be used to form addressing mode. These wrapped nodes will be selected
3371// into MOVi.
3372SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3373 SelectionDAG &DAG) const {
3374 EVT PtrVT = Op.getValueType();
3375 // FIXME there is no actual debug info here
3376 SDLoc dl(Op);
3377 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3378 SDValue Res;
3379
3380 // When generating execute-only code Constant Pools must be promoted to the
3381 // global data section. It's a bit ugly that we can't share them across basic
3382 // blocks, but this way we guarantee that execute-only behaves correct with
3383 // position-independent addressing modes.
3384 if (Subtarget->genExecuteOnly()) {
3385 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3386 auto T = const_cast<Type*>(CP->getType());
3387 auto C = const_cast<Constant*>(CP->getConstVal());
3388 auto M = const_cast<Module*>(DAG.getMachineFunction().
3389 getFunction().getParent());
3390 auto GV = new GlobalVariable(
3391 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3392 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
3393 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
3394 Twine(AFI->createPICLabelUId())
3395 );
3396 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3397 dl, PtrVT);
3398 return LowerGlobalAddress(GA, DAG);
3399 }
3400
3401 if (CP->isMachineConstantPoolEntry())
3402 Res =
3403 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3404 else
3405 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign());
3406 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3407}
3408
3409unsigned ARMTargetLowering::getJumpTableEncoding() const {
3410 return MachineJumpTableInfo::EK_Inline;
3411}
3412
3413SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3414 SelectionDAG &DAG) const {
3415 MachineFunction &MF = DAG.getMachineFunction();
3416 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3417 unsigned ARMPCLabelIndex = 0;
3418 SDLoc DL(Op);
3419 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3420 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3421 SDValue CPAddr;
3422 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3423 if (!IsPositionIndependent) {
3424 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3425 } else {
3426 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3427 ARMPCLabelIndex = AFI->createPICLabelUId();
3428 ARMConstantPoolValue *CPV =
3429 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3430 ARMCP::CPBlockAddress, PCAdj);
3431 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3432 }
3433 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3434 SDValue Result = DAG.getLoad(
3435 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3436 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3437 if (!IsPositionIndependent)
3438 return Result;
3439 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3440 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3441}
3442
3443/// Convert a TLS address reference into the correct sequence of loads
3444/// and calls to compute the variable's address for Darwin, and return an
3445/// SDValue containing the final node.
3446
3447/// Darwin only has one TLS scheme which must be capable of dealing with the
3448/// fully general situation, in the worst case. This means:
3449/// + "extern __thread" declaration.
3450/// + Defined in a possibly unknown dynamic library.
3451///
3452/// The general system is that each __thread variable has a [3 x i32] descriptor
3453/// which contains information used by the runtime to calculate the address. The
3454/// only part of this the compiler needs to know about is the first word, which
3455/// contains a function pointer that must be called with the address of the
3456/// entire descriptor in "r0".
3457///
3458/// Since this descriptor may be in a different unit, in general access must
3459/// proceed along the usual ARM rules. A common sequence to produce is:
3460///
3461/// movw rT1, :lower16:_var$non_lazy_ptr
3462/// movt rT1, :upper16:_var$non_lazy_ptr
3463/// ldr r0, [rT1]
3464/// ldr rT2, [r0]
3465/// blx rT2
3466/// [...address now in r0...]
3467SDValue
3468ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3469 SelectionDAG &DAG) const {
3470 assert(Subtarget->isTargetDarwin() &&(static_cast<void> (0))
3471 "This function expects a Darwin target")(static_cast<void> (0));
3472 SDLoc DL(Op);
3473
3474 // First step is to get the address of the actua global symbol. This is where
3475 // the TLS descriptor lives.
3476 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3477
3478 // The first entry in the descriptor is a function pointer that we must call
3479 // to obtain the address of the variable.
3480 SDValue Chain = DAG.getEntryNode();
3481 SDValue FuncTLVGet = DAG.getLoad(
3482 MVT::i32, DL, Chain, DescAddr,
3483 MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),
3484 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
3485 MachineMemOperand::MOInvariant);
3486 Chain = FuncTLVGet.getValue(1);
3487
3488 MachineFunction &F = DAG.getMachineFunction();
3489 MachineFrameInfo &MFI = F.getFrameInfo();
3490 MFI.setAdjustsStack(true);
3491
3492 // TLS calls preserve all registers except those that absolutely must be
3493 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3494 // silly).
3495 auto TRI =
3496 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
3497 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3498 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
3499
3500 // Finally, we can make the call. This is just a degenerate version of a
3501 // normal AArch64 call node: r0 takes the address of the descriptor, and
3502 // returns the address of the variable in this thread.
3503 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3504 Chain =
3505 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3506 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3507 DAG.getRegisterMask(Mask), Chain.getValue(1));
3508 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3509}
3510
3511SDValue
3512ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3513 SelectionDAG &DAG) const {
3514 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast<void> (0));
3515
3516 SDValue Chain = DAG.getEntryNode();
3517 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3518 SDLoc DL(Op);
3519
3520 // Load the current TEB (thread environment block)
3521 SDValue Ops[] = {Chain,
3522 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3523 DAG.getTargetConstant(15, DL, MVT::i32),
3524 DAG.getTargetConstant(0, DL, MVT::i32),
3525 DAG.getTargetConstant(13, DL, MVT::i32),
3526 DAG.getTargetConstant(0, DL, MVT::i32),
3527 DAG.getTargetConstant(2, DL, MVT::i32)};
3528 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3529 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3530
3531 SDValue TEB = CurrentTEB.getValue(0);
3532 Chain = CurrentTEB.getValue(1);
3533
3534 // Load the ThreadLocalStoragePointer from the TEB
3535 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3536 SDValue TLSArray =
3537 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3538 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3539
3540 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3541 // offset into the TLSArray.
3542
3543 // Load the TLS index from the C runtime
3544 SDValue TLSIndex =
3545 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3546 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3547 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3548
3549 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3550 DAG.getConstant(2, DL, MVT::i32));
3551 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3552 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3553 MachinePointerInfo());
3554
3555 // Get the offset of the start of the .tls section (section base)
3556 const auto *GA = cast<GlobalAddressSDNode>(Op);
3557 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3558 SDValue Offset = DAG.getLoad(
3559 PtrVT, DL, Chain,
3560 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3561 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3562 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3563
3564 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3565}
3566
3567// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3568SDValue
3569ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3570 SelectionDAG &DAG) const {
3571 SDLoc dl(GA);
3572 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3573 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3574 MachineFunction &MF = DAG.getMachineFunction();
3575 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3576 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3577 ARMConstantPoolValue *CPV =
3578 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3579 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3580 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3581 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3582 Argument = DAG.getLoad(
3583 PtrVT, dl, DAG.getEntryNode(), Argument,
3584 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3585 SDValue Chain = Argument.getValue(1);
3586
3587 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3588 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3589
3590 // call __tls_get_addr.
3591 ArgListTy Args;
3592 ArgListEntry Entry;
3593 Entry.Node = Argument;
3594 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3595 Args.push_back(Entry);
3596
3597 // FIXME: is there useful debug info available here?
3598 TargetLowering::CallLoweringInfo CLI(DAG);
3599 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3600 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
3601 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3602
3603 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3604 return CallResult.first;
3605}
3606
3607// Lower ISD::GlobalTLSAddress using the "initial exec" or
3608// "local exec" model.
3609SDValue
3610ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3611 SelectionDAG &DAG,
3612 TLSModel::Model model) const {
3613 const GlobalValue *GV = GA->getGlobal();
3614 SDLoc dl(GA);
3615 SDValue Offset;
3616 SDValue Chain = DAG.getEntryNode();
3617 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3618 // Get the Thread Pointer
3619 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3620
3621 if (model == TLSModel::InitialExec) {
3622 MachineFunction &MF = DAG.getMachineFunction();
3623 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3624 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3625 // Initial exec model.
3626 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3627 ARMConstantPoolValue *CPV =
3628 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3629 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
3630 true);
3631 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3632 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3633 Offset = DAG.getLoad(
3634 PtrVT, dl, Chain, Offset,
3635 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3636 Chain = Offset.getValue(1);
3637
3638 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3639 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3640
3641 Offset = DAG.getLoad(
3642 PtrVT, dl, Chain, Offset,
3643 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3644 } else {
3645 // local exec model
3646 assert(model == TLSModel::LocalExec)(static_cast<void> (0));
3647 ARMConstantPoolValue *CPV =
3648 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
3649 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3650 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3651 Offset = DAG.getLoad(
3652 PtrVT, dl, Chain, Offset,
3653 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3654 }
3655
3656 // The address of the thread local variable is the add of the thread
3657 // pointer with the offset of the variable.
3658 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3659}
3660
3661SDValue
3662ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3663 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3664 if (DAG.getTarget().useEmulatedTLS())
3665 return LowerToTLSEmulatedModel(GA, DAG);
3666
3667 if (Subtarget->isTargetDarwin())
3668 return LowerGlobalTLSAddressDarwin(Op, DAG);
3669
3670 if (Subtarget->isTargetWindows())
3671 return LowerGlobalTLSAddressWindows(Op, DAG);
3672
3673 // TODO: implement the "local dynamic" model
3674 assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast<void> (0));
3675 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
3676
3677 switch (model) {
3678 case TLSModel::GeneralDynamic:
3679 case TLSModel::LocalDynamic:
3680 return LowerToTLSGeneralDynamicModel(GA, DAG);
3681 case TLSModel::InitialExec:
3682 case TLSModel::LocalExec:
3683 return LowerToTLSExecModels(GA, DAG, model);
3684 }
3685 llvm_unreachable("bogus TLS model")__builtin_unreachable();
3686}
3687
3688/// Return true if all users of V are within function F, looking through
3689/// ConstantExprs.
3690static bool allUsersAreInFunction(const Value *V, const Function *F) {
3691 SmallVector<const User*,4> Worklist(V->users());
3692 while (!Worklist.empty()) {
3693 auto *U = Worklist.pop_back_val();
3694 if (isa<ConstantExpr>(U)) {
3695 append_range(Worklist, U->users());
3696 continue;
3697 }
3698
3699 auto *I = dyn_cast<Instruction>(U);
3700 if (!I || I->getParent()->getParent() != F)
3701 return false;
3702 }
3703 return true;
3704}
3705
3706static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
3707 const GlobalValue *GV, SelectionDAG &DAG,
3708 EVT PtrVT, const SDLoc &dl) {
3709 // If we're creating a pool entry for a constant global with unnamed address,
3710 // and the global is small enough, we can emit it inline into the constant pool
3711 // to save ourselves an indirection.
3712 //
3713 // This is a win if the constant is only used in one function (so it doesn't
3714 // need to be duplicated) or duplicating the constant wouldn't increase code
3715 // size (implying the constant is no larger than 4 bytes).
3716 const Function &F = DAG.getMachineFunction().getFunction();
3717
3718 // We rely on this decision to inline being idemopotent and unrelated to the
3719 // use-site. We know that if we inline a variable at one use site, we'll
3720 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3721 // doesn't know about this optimization, so bail out if it's enabled else
3722 // we could decide to inline here (and thus never emit the GV) but require
3723 // the GV from fast-isel generated code.
3724 if (!EnableConstpoolPromotion ||
3725 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3726 return SDValue();
3727
3728 auto *GVar = dyn_cast<GlobalVariable>(GV);
3729 if (!GVar || !GVar->hasInitializer() ||
3730 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3731 !GVar->hasLocalLinkage())
3732 return SDValue();
3733
3734 // If we inline a value that contains relocations, we move the relocations
3735 // from .data to .text. This is not allowed in position-independent code.
3736 auto *Init = GVar->getInitializer();
3737 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3738 Init->needsDynamicRelocation())
3739 return SDValue();
3740
3741 // The constant islands pass can only really deal with alignment requests
3742 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3743 // any type wanting greater alignment requirements than 4 bytes. We also
3744 // can only promote constants that are multiples of 4 bytes in size or
3745 // are paddable to a multiple of 4. Currently we only try and pad constants
3746 // that are strings for simplicity.
3747 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3748 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3749 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3750 unsigned RequiredPadding = 4 - (Size % 4);
3751 bool PaddingPossible =
3752 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3753 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3754 Size == 0)
3755 return SDValue();
3756
3757 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3758 MachineFunction &MF = DAG.getMachineFunction();
3759 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3760
3761 // We can't bloat the constant pool too much, else the ConstantIslands pass
3762 // may fail to converge. If we haven't promoted this global yet (it may have
3763 // multiple uses), and promoting it would increase the constant pool size (Sz
3764 // > 4), ensure we have space to do so up to MaxTotal.
3765 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3766 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3767 ConstpoolPromotionMaxTotal)
3768 return SDValue();
3769
3770 // This is only valid if all users are in a single function; we can't clone
3771 // the constant in general. The LLVM IR unnamed_addr allows merging
3772 // constants, but not cloning them.
3773 //
3774 // We could potentially allow cloning if we could prove all uses of the
3775 // constant in the current function don't care about the address, like
3776 // printf format strings. But that isn't implemented for now.
3777 if (!allUsersAreInFunction(GVar, &F))
3778 return SDValue();
3779
3780 // We're going to inline this global. Pad it out if needed.
3781 if (RequiredPadding != 4) {
3782 StringRef S = CDAInit->getAsString();
3783
3784 SmallVector<uint8_t,16> V(S.size());
3785 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3786 while (RequiredPadding--)
3787 V.push_back(0);
3788 Init = ConstantDataArray::get(*DAG.getContext(), V);
3789 }
3790
3791 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3792 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3793 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3794 AFI->markGlobalAsPromotedToConstantPool(GVar);
3795 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3796 PaddedSize - 4);
3797 }
3798 ++NumConstpoolPromoted;
3799 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3800}
3801
3802bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3803 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3804 if (!(GV = GA->getBaseObject()))
3805 return false;
3806 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3807 return V->isConstant();
3808 return isa<Function>(GV);
3809}
3810
3811SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3812 SelectionDAG &DAG) const {
3813 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3814 default: llvm_unreachable("unknown object format")__builtin_unreachable();
3815 case Triple::COFF:
3816 return LowerGlobalAddressWindows(Op, DAG);
3817 case Triple::ELF:
3818 return LowerGlobalAddressELF(Op, DAG);
3819 case Triple::MachO:
3820 return LowerGlobalAddressDarwin(Op, DAG);
3821 }
3822}
3823
3824SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3825 SelectionDAG &DAG) const {
3826 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3827 SDLoc dl(Op);
3828 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3829 const TargetMachine &TM = getTargetMachine();
3830 bool IsRO = isReadOnly(GV);
3831
3832 // promoteToConstantPool only if not generating XO text section
3833 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3834 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3835 return V;
3836
3837 if (isPositionIndependent()) {
3838 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3839 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3840 UseGOT_PREL ? ARMII::MO_GOT : 0);
3841 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3842 if (UseGOT_PREL)
3843 Result =
3844 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3845 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3846 return Result;
3847 } else if (Subtarget->isROPI() && IsRO) {
3848 // PC-relative.
3849 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3850 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3851 return Result;
3852 } else if (Subtarget->isRWPI() && !IsRO) {
3853 // SB-relative.
3854 SDValue RelAddr;
3855 if (Subtarget->useMovt()) {
3856 ++NumMovwMovt;
3857 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3858 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3859 } else { // use literal pool for address constant
3860 ARMConstantPoolValue *CPV =
3861 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3862 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3863 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3864 RelAddr = DAG.getLoad(
3865 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3866 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3867 }
3868 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3869 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3870 return Result;
3871 }
3872
3873 // If we have T2 ops, we can materialize the address directly via movt/movw
3874 // pair. This is always cheaper.
3875 if (Subtarget->useMovt()) {
3876 ++NumMovwMovt;
3877 // FIXME: Once remat is capable of dealing with instructions with register
3878 // operands, expand this into two nodes.
3879 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3880 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3881 } else {
3882 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3883 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3884 return DAG.getLoad(
3885 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3886 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3887 }
3888}
3889
3890SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3891 SelectionDAG &DAG) const {
3892 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast<void> (0))
3893 "ROPI/RWPI not currently supported for Darwin")(static_cast<void> (0));
3894 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3895 SDLoc dl(Op);
3896 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3897
3898 if (Subtarget->useMovt())
3899 ++NumMovwMovt;
3900
3901 // FIXME: Once remat is capable of dealing with instructions with register
3902 // operands, expand this into multiple nodes
3903 unsigned Wrapper =
3904 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3905
3906 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3907 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3908
3909 if (Subtarget->isGVIndirectSymbol(GV))
3910 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3911 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3912 return Result;
3913}
3914
3915SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3916 SelectionDAG &DAG) const {
3917 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast<void> (0));
3918 assert(Subtarget->useMovt() &&(static_cast<void> (0))
3919 "Windows on ARM expects to use movw/movt")(static_cast<void> (0));
3920 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast<void> (0))
3921 "ROPI/RWPI not currently supported for Windows")(static_cast<void> (0));
3922
3923 const TargetMachine &TM = getTargetMachine();
3924 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3925 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
3926 if (GV->hasDLLImportStorageClass())
3927 TargetFlags = ARMII::MO_DLLIMPORT;
3928 else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3929 TargetFlags = ARMII::MO_COFFSTUB;
3930 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3931 SDValue Result;
3932 SDLoc DL(Op);
3933
3934 ++NumMovwMovt;
3935
3936 // FIXME: Once remat is capable of dealing with instructions with register
3937 // operands, expand this into two nodes.
3938 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3939 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
3940 TargetFlags));
3941 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
3942 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3943 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3944 return Result;
3945}
3946
3947SDValue
3948ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3949 SDLoc dl(Op);
3950 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3951 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3952 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3953 Op.getOperand(1), Val);
3954}
3955
3956SDValue
3957ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3958 SDLoc dl(Op);
3959 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3960 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3961}
3962
3963SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3964 SelectionDAG &DAG) const {
3965 SDLoc dl(Op);
3966 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3967 Op.getOperand(0));
3968}
3969
3970SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
3971 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
3972 unsigned IntNo =
3973 cast<ConstantSDNode>(
3974 Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
3975 ->getZExtValue();
3976 switch (IntNo) {
3977 default:
3978 return SDValue(); // Don't custom lower most intrinsics.
3979 case Intrinsic::arm_gnu_eabi_mcount: {
3980 MachineFunction &MF = DAG.getMachineFunction();
3981 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3982 SDLoc dl(Op);
3983 SDValue Chain = Op.getOperand(0);
3984 // call "\01__gnu_mcount_nc"
3985 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
3986 const uint32_t *Mask =
3987 ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3988 assert(Mask && "Missing call preserved mask for calling convention")(static_cast<void> (0));
3989 // Mark LR an implicit live-in.
3990 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
3991 SDValue ReturnAddress =
3992 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
3993 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
3994 SDValue Callee =
3995 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
3996 SDValue RegisterMask = DAG.getRegisterMask(Mask);
3997 if (Subtarget->isThumb())
3998 return SDValue(
3999 DAG.getMachineNode(
4000 ARM::tBL_PUSHLR, dl, ResultTys,
4001 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4002 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4003 0);
4004 return SDValue(
4005 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4006 {ReturnAddress, Callee, RegisterMask, Chain}),
4007 0);
4008 }
4009 }
4010}
4011
4012SDValue
4013ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4014 const ARMSubtarget *Subtarget) const {
4015 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4016 SDLoc dl(Op);
4017 switch (IntNo) {
4018 default: return SDValue(); // Don't custom lower most intrinsics.
4019 case Intrinsic::thread_pointer: {
4020 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4021 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4022 }
4023 case Intrinsic::arm_cls: {
4024 const SDValue &Operand = Op.getOperand(1);
4025 const EVT VTy = Op.getValueType();
4026 SDValue SRA =
4027 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4028 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4029 SDValue SHL =
4030 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4031 SDValue OR =
4032 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4033 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4034 return Result;
4035 }
4036 case Intrinsic::arm_cls64: {
4037 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4038 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4039 const SDValue &Operand = Op.getOperand(1);
4040 const EVT VTy = Op.getValueType();
4041
4042 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4043 DAG.getConstant(1, dl, VTy));
4044 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
4045 DAG.getConstant(0, dl, VTy));
4046 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4047 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4048 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4049 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4050 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4051 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4052 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4053 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4054 SDValue CheckLo =
4055 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4056 SDValue HiIsZero =
4057 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4058 SDValue AdjustedLo =
4059 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4060 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4061 SDValue Result =
4062 DAG.getSelect(dl, VTy, CheckLo,
4063 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4064 return Result;
4065 }
4066 case Intrinsic::eh_sjlj_lsda: {
4067 MachineFunction &MF = DAG.getMachineFunction();
4068 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4069 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4070 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4071 SDValue CPAddr;
4072 bool IsPositionIndependent = isPositionIndependent();
4073 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4074 ARMConstantPoolValue *CPV =
4075 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4076 ARMCP::CPLSDA, PCAdj);
4077 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4078 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4079 SDValue Result = DAG.getLoad(
4080 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4081 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
4082
4083 if (IsPositionIndependent) {
4084 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4085 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4086 }
4087 return Result;
4088 }
4089 case Intrinsic::arm_neon_vabs:
4090 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4091 Op.getOperand(1));
4092 case Intrinsic::arm_neon_vmulls:
4093 case Intrinsic::arm_neon_vmullu: {
4094 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4095 ? ARMISD::VMULLs : ARMISD::VMULLu;
4096 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4097 Op.getOperand(1), Op.getOperand(2));
4098 }
4099 case Intrinsic::arm_neon_vminnm:
4100 case Intrinsic::arm_neon_vmaxnm: {
4101 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4102 ? ISD::FMINNUM : ISD::FMAXNUM;
4103 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4104 Op.getOperand(1), Op.getOperand(2));
4105 }
4106 case Intrinsic::arm_neon_vminu:
4107 case Intrinsic::arm_neon_vmaxu: {
4108 if (Op.getValueType().isFloatingPoint())
4109 return SDValue();
4110 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4111 ? ISD::UMIN : ISD::UMAX;
4112 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4113 Op.getOperand(1), Op.getOperand(2));
4114 }
4115 case Intrinsic::arm_neon_vmins:
4116 case Intrinsic::arm_neon_vmaxs: {
4117 // v{min,max}s is overloaded between signed integers and floats.
4118 if (!Op.getValueType().isFloatingPoint()) {
4119 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4120 ? ISD::SMIN : ISD::SMAX;
4121 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4122 Op.getOperand(1), Op.getOperand(2));
4123 }
4124 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4125 ? ISD::FMINIMUM : ISD::FMAXIMUM;
4126 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4127 Op.getOperand(1), Op.getOperand(2));
4128 }
4129 case Intrinsic::arm_neon_vtbl1:
4130 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4131 Op.getOperand(1), Op.getOperand(2));
4132 case Intrinsic::arm_neon_vtbl2:
4133 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4134 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4135 case Intrinsic::arm_mve_pred_i2v:
4136 case Intrinsic::arm_mve_pred_v2i:
4137 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4138 Op.getOperand(1));
4139 case Intrinsic::arm_mve_vreinterpretq:
4140 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4141 Op.getOperand(1));
4142 case Intrinsic::arm_mve_lsll:
4143 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4144 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4145 case Intrinsic::arm_mve_asrl:
4146 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4147 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4148 }
4149}
4150
4151static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4152 const ARMSubtarget *Subtarget) {
4153 SDLoc dl(Op);
4154 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
4155 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
4156 if (SSID == SyncScope::SingleThread)
4157 return Op;
4158
4159 if (!Subtarget->hasDataBarrier()) {
4160 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4161 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4162 // here.
4163 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast<void> (0))
4164 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast<void> (0));
4165 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4166 DAG.getConstant(0, dl, MVT::i32));
4167 }
4168
4169 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
4170 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
4171 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
4172 if (Subtarget->isMClass()) {
4173 // Only a full system barrier exists in the M-class architectures.
4174 Domain = ARM_MB::SY;
4175 } else if (Subtarget->preferISHSTBarriers() &&
4176 Ord == AtomicOrdering::Release) {
4177 // Swift happens to implement ISHST barriers in a way that's compatible with
4178 // Release semantics but weaker than ISH so we'd be fools not to use
4179 // it. Beware: other processors probably don't!
4180 Domain = ARM_MB::ISHST;
4181 }
4182
4183 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4184 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4185 DAG.getConstant(Domain, dl, MVT::i32));
4186}
4187
4188static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
4189 const ARMSubtarget *Subtarget) {
4190 // ARM pre v5TE and Thumb1 does not have preload instructions.
4191 if (!(Subtarget->isThumb2() ||
4192 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4193 // Just preserve the chain.
4194 return Op.getOperand(0);
4195
4196 SDLoc dl(Op);
4197 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
4198 if (!isRead &&
4199 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4200 // ARMv7 with MP extension has PLDW.
4201 return Op.getOperand(0);
4202
4203 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4204 if (Subtarget->isThumb()) {
4205 // Invert the bits.
4206 isRead = ~isRead & 1;
4207 isData = ~isData & 1;
4208 }
4209
4210 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4211 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4212 DAG.getConstant(isData, dl, MVT::i32));
4213}
4214
4215static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
4216 MachineFunction &MF = DAG.getMachineFunction();
4217 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4218
4219 // vastart just stores the address of the VarArgsFrameIndex slot into the
4220 // memory location argument.
4221 SDLoc dl(Op);
4222 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4223 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4224 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4225 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4226 MachinePointerInfo(SV));
4227}
4228
4229SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4230 CCValAssign &NextVA,
4231 SDValue &Root,
4232 SelectionDAG &DAG,
4233 const SDLoc &dl) const {
4234 MachineFunction &MF = DAG.getMachineFunction();
4235 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4236
4237 const TargetRegisterClass *RC;
4238 if (AFI->isThumb1OnlyFunction())
4239 RC = &ARM::tGPRRegClass;
4240 else
4241 RC = &ARM::GPRRegClass;
4242
4243 // Transform the arguments stored in physical registers into virtual ones.
4244 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4245 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4246
4247 SDValue ArgValue2;
4248 if (NextVA.isMemLoc()) {
4249 MachineFrameInfo &MFI = MF.getFrameInfo();
4250 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4251
4252 // Create load node to retrieve arguments from the stack.
4253 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4254 ArgValue2 = DAG.getLoad(
4255 MVT::i32, dl, Root, FIN,
4256 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4257 } else {
4258 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4259 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4260 }
4261 if (!Subtarget->isLittle())
4262 std::swap (ArgValue, ArgValue2);
4263 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4264}
4265
4266// The remaining GPRs hold either the beginning of variable-argument
4267// data, or the beginning of an aggregate passed by value (usually
4268// byval). Either way, we allocate stack slots adjacent to the data
4269// provided by our caller, and store the unallocated registers there.
4270// If this is a variadic function, the va_list pointer will begin with
4271// these values; otherwise, this reassembles a (byval) structure that
4272// was split between registers and memory.
4273// Return: The frame index registers were stored into.
4274int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4275 const SDLoc &dl, SDValue &Chain,
4276 const Value *OrigArg,
4277 unsigned InRegsParamRecordIdx,
4278 int ArgOffset, unsigned ArgSize) const {
4279 // Currently, two use-cases possible:
4280 // Case #1. Non-var-args function, and we meet first byval parameter.
4281 // Setup first unallocated register as first byval register;
4282 // eat all remained registers
4283 // (these two actions are performed by HandleByVal method).
4284 // Then, here, we initialize stack frame with
4285 // "store-reg" instructions.
4286 // Case #2. Var-args function, that doesn't contain byval parameters.
4287 // The same: eat all remained unallocated registers,
4288 // initialize stack frame.
4289
4290 MachineFunction &MF = DAG.getMachineFunction();
4291 MachineFrameInfo &MFI = MF.getFrameInfo();
4292 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4293 unsigned RBegin, REnd;
4294 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4295 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4296 } else {
4297 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4298 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4299 REnd = ARM::R4;
4300 }
4301
4302 if (REnd != RBegin)
4303 ArgOffset = -4 * (ARM::R4 - RBegin);
4304
4305 auto PtrVT = getPointerTy(DAG.getDataLayout());
4306 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4307 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4308
4309 SmallVector<SDValue, 4> MemOps;
4310 const TargetRegisterClass *RC =
4311 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4312
4313 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4314 unsigned VReg = MF.addLiveIn(Reg, RC);
4315 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4316 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4317 MachinePointerInfo(OrigArg, 4 * i));
4318 MemOps.push_back(Store);
4319 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4320 }
4321
4322 if (!MemOps.empty())
4323 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4324 return FrameIndex;
4325}
4326
4327// Setup stack frame, the va_list pointer will start from.
4328void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4329 const SDLoc &dl, SDValue &Chain,
4330 unsigned ArgOffset,
4331 unsigned TotalArgRegsSaveSize,
4332 bool ForceMutable) const {
4333 MachineFunction &MF = DAG.getMachineFunction();
4334 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4335
4336 // Try to store any remaining integer argument regs
4337 // to their spots on the stack so that they may be loaded by dereferencing
4338 // the result of va_next.
4339 // If there is no regs to be stored, just point address after last
4340 // argument passed via stack.
4341 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
4342 CCInfo.getInRegsParamsCount(),
4343 CCInfo.getNextStackOffset(),
4344 std::max(4U, TotalArgRegsSaveSize));
4345 AFI->setVarArgsFrameIndex(FrameIndex);
4346}
4347
4348bool ARMTargetLowering::splitValueIntoRegisterParts(
4349 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4350 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
4351 bool IsABIRegCopy = CC.hasValue();
4352 EVT ValueVT = Val.getValueType();
4353 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4354 PartVT == MVT::f32) {
4355 unsigned ValueBits = ValueVT.getSizeInBits();
4356 unsigned PartBits = PartVT.getSizeInBits();
4357 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4358 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4359 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4360 Parts[0] = Val;
4361 return true;
4362 }
4363 return false;
4364}
4365
4366SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4367 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4368 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
4369 bool IsABIRegCopy = CC.hasValue();
4370 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
4371 PartVT == MVT::f32) {
4372 unsigned ValueBits = ValueVT.getSizeInBits();
4373 unsigned PartBits = PartVT.getSizeInBits();
4374 SDValue Val = Parts[0];
4375
4376 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4377 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4378 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4379 return Val;
4380 }
4381 return SDValue();
4382}
4383
4384SDValue ARMTargetLowering::LowerFormalArguments(
4385 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4386 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4387 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4388 MachineFunction &MF = DAG.getMachineFunction();
4389 MachineFrameInfo &MFI = MF.getFrameInfo();
4390
4391 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
4392
4393 // Assign locations to all of the incoming arguments.
4394 SmallVector<CCValAssign, 16> ArgLocs;
4395 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4396 *DAG.getContext());
4397 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4398
4399 SmallVector<SDValue, 16> ArgValues;
4400 SDValue ArgValue;
4401 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
4402 unsigned CurArgIdx = 0;
4403
4404 // Initially ArgRegsSaveSize is zero.
4405 // Then we increase this value each time we meet byval parameter.
4406 // We also increase this value in case of varargs function.
4407 AFI->setArgRegsSaveSize(0);
4408
4409 // Calculate the amount of stack space that we need to allocate to store
4410 // byval and variadic arguments that are passed in registers.
4411 // We need to know this before we allocate the first byval or variadic
4412 // argument, as they will be allocated a stack slot below the CFA (Canonical
4413 // Frame Address, the stack pointer at entry to the function).
4414 unsigned ArgRegBegin = ARM::R4;
4415 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4416 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4417 break;
4418
4419 CCValAssign &VA = ArgLocs[i];
4420 unsigned Index = VA.getValNo();
4421 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4422 if (!Flags.isByVal())
4423 continue;
4424
4425 assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast<void> (0));
4426 unsigned RBegin, REnd;
4427 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4428 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4429
4430 CCInfo.nextInRegsParam();
4431 }
4432 CCInfo.rewindByValRegsInfo();
4433
4434 int lastInsIndex = -1;
4435 if (isVarArg && MFI.hasVAStart()) {
4436 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4437 if (RegIdx != array_lengthof(GPRArgRegs))
4438 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4439 }
4440
4441 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4442 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4443 auto PtrVT = getPointerTy(DAG.getDataLayout());
4444
4445 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4446 CCValAssign &VA = ArgLocs[i];
4447 if (Ins[VA.getValNo()].isOrigArg()) {
4448 std::advance(CurOrigArg,
4449 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4450 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4451 }
4452 // Arguments stored in registers.
4453 if (VA.isRegLoc()) {
4454 EVT RegVT = VA.getLocVT();
4455
4456 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4457 // f64 and vector types are split up into multiple registers or
4458 // combinations of registers and stack slots.
4459 SDValue ArgValue1 =
4460 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4461 VA = ArgLocs[++i]; // skip ahead to next loc
4462 SDValue ArgValue2;
4463 if (VA.isMemLoc()) {
4464 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4465 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4466 ArgValue2 = DAG.getLoad(
4467 MVT::f64, dl, Chain, FIN,
4468 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4469 } else {
4470 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4471 }
4472 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4473 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4474 ArgValue1, DAG.getIntPtrConstant(0, dl));
4475 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4476 ArgValue2, DAG.getIntPtrConstant(1, dl));
4477 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4478 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4479 } else {
4480 const TargetRegisterClass *RC;
4481
4482 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4483 RC = &ARM::HPRRegClass;
4484 else if (RegVT == MVT::f32)
4485 RC = &ARM::SPRRegClass;
4486 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4487 RegVT == MVT::v4bf16)
4488 RC = &ARM::DPRRegClass;
4489 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4490 RegVT == MVT::v8bf16)
4491 RC = &ARM::QPRRegClass;
4492 else if (RegVT == MVT::i32)
4493 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4494 : &ARM::GPRRegClass;
4495 else
4496 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")__builtin_unreachable();
4497
4498 // Transform the arguments in physical registers into virtual ones.
4499 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4500 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4501
4502 // If this value is passed in r0 and has the returned attribute (e.g.
4503 // C++ 'structors), record this fact for later use.
4504 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4505 AFI->setPreservesR0();
4506 }
4507 }
4508
4509 // If this is an 8 or 16-bit value, it is really passed promoted
4510 // to 32 bits. Insert an assert[sz]ext to capture this, then
4511 // truncate to the right size.
4512 switch (VA.getLocInfo()) {
4513 default: llvm_unreachable("Unknown loc info!")__builtin_unreachable();
4514 case CCValAssign::Full: break;
4515 case CCValAssign::BCvt:
4516 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4517 break;
4518 case CCValAssign::SExt:
4519 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4520 DAG.getValueType(VA.getValVT()));
4521 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4522 break;
4523 case CCValAssign::ZExt:
4524 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4525 DAG.getValueType(VA.getValVT()));
4526 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4527 break;
4528 }
4529
4530 // f16 arguments have their size extended to 4 bytes and passed as if they
4531 // had been copied to the LSBs of a 32-bit register.
4532 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4533 if (VA.needsCustom() &&
4534 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4535 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4536
4537 InVals.push_back(ArgValue);
4538 } else { // VA.isRegLoc()
4539 // sanity check
4540 assert(VA.isMemLoc())(static_cast<void> (0));
4541 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast<void> (0));
4542
4543 int index = VA.getValNo();
4544
4545 // Some Ins[] entries become multiple ArgLoc[] entries.
4546 // Process them only once.
4547 if (index != lastInsIndex)
4548 {
4549 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4550 // FIXME: For now, all byval parameter objects are marked mutable.
4551 // This can be changed with more analysis.
4552 // In case of tail call optimization mark all arguments mutable.
4553 // Since they could be overwritten by lowering of arguments in case of
4554 // a tail call.
4555 if (Flags.isByVal()) {
4556 assert(Ins[index].isOrigArg() &&(static_cast<void> (0))
4557 "Byval arguments cannot be implicit")(static_cast<void> (0));
4558 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4559
4560 int FrameIndex = StoreByValRegs(
4561 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4562 VA.getLocMemOffset(), Flags.getByValSize());
4563 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4564 CCInfo.nextInRegsParam();
4565 } else {
4566 unsigned FIOffset = VA.getLocMemOffset();
4567 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4568 FIOffset, true);
4569
4570 // Create load nodes to retrieve arguments from the stack.
4571 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4572 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4573 MachinePointerInfo::getFixedStack(
4574 DAG.getMachineFunction(), FI)));
4575 }
4576 lastInsIndex = index;
4577 }
4578 }
4579 }
4580
4581 // varargs
4582 if (isVarArg && MFI.hasVAStart()) {
4583 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
4584 TotalArgRegsSaveSize);
4585 if (AFI->isCmseNSEntryFunction()) {
4586 DiagnosticInfoUnsupported Diag(
4587 DAG.getMachineFunction().getFunction(),
4588 "secure entry function must not be variadic", dl.getDebugLoc());
4589 DAG.getContext()->diagnose(Diag);
4590 }
4591 }
4592
4593 unsigned StackArgSize = CCInfo.getNextStackOffset();
4594 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4595 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4596 // The only way to guarantee a tail call is if the callee restores its
4597 // argument area, but it must also keep the stack aligned when doing so.
4598 const DataLayout &DL = DAG.getDataLayout();
4599 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4600
4601 AFI->setArgumentStackToRestore(StackArgSize);
4602 }
4603 AFI->setArgumentStackSize(StackArgSize);
4604
4605 if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
4606 DiagnosticInfoUnsupported Diag(
4607 DAG.getMachineFunction().getFunction(),
4608 "secure entry function requires arguments on stack", dl.getDebugLoc());
4609 DAG.getContext()->diagnose(Diag);
4610 }
4611
4612 return Chain;
4613}
4614
4615/// isFloatingPointZero - Return true if this is +0.0.
4616static bool isFloatingPointZero(SDValue Op) {
4617 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4618 return CFP->getValueAPF().isPosZero();
4619 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4620 // Maybe this has already been legalized into the constant pool?
4621 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4622 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4623 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4624 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4625 return CFP->getValueAPF().isPosZero();
4626 }
4627 } else if (Op->getOpcode() == ISD::BITCAST &&
4628 Op->getValueType(0) == MVT::f64) {
4629 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4630 // created by LowerConstantFP().
4631 SDValue BitcastOp = Op->getOperand(0);
4632 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4633 isNullConstant(BitcastOp->getOperand(0)))
4634 return true;
4635 }
4636 return false;
4637}
4638
4639/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4640/// the given operands.
4641SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4642 SDValue &ARMcc, SelectionDAG &DAG,
4643 const SDLoc &dl) const {
4644 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4645 unsigned C = RHSC->getZExtValue();
4646 if (!isLegalICmpImmediate((int32_t)C)) {
4647 // Constant does not fit, try adjusting it by one.
4648 switch (CC) {
4649 default: break;
4650 case ISD::SETLT:
4651 case ISD::SETGE:
4652 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4653 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
4654 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4655 }
4656 break;
4657 case ISD::SETULT:
4658 case ISD::SETUGE:
4659 if (C != 0 && isLegalICmpImmediate(C-1)) {
4660 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
4661 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4662 }
4663 break;
4664 case ISD::SETLE:
4665 case ISD::SETGT:
4666 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4667 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
4668 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4669 }
4670 break;
4671 case ISD::SETULE:
4672 case ISD::SETUGT:
4673 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4674 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4675 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4676 }
4677 break;
4678 }
4679 }
4680 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4681 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
4682 // In ARM and Thumb-2, the compare instructions can shift their second
4683 // operand.
4684 CC = ISD::getSetCCSwappedOperands(CC);
4685 std::swap(LHS, RHS);
4686 }
4687
4688 // Thumb1 has very limited immediate modes, so turning an "and" into a
4689 // shift can save multiple instructions.
4690 //
4691 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4692 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4693 // own. If it's the operand to an unsigned comparison with an immediate,
4694 // we can eliminate one of the shifts: we transform
4695 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4696 //
4697 // We avoid transforming cases which aren't profitable due to encoding
4698 // details:
4699 //
4700 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4701 // would not; in that case, we're essentially trading one immediate load for
4702 // another.
4703 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4704 // 3. C2 is zero; we have other code for this special case.
4705 //
4706 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4707 // instruction, since the AND is always one instruction anyway, but we could
4708 // use narrow instructions in some cases.
4709 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4710 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4711 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4712 !isSignedIntSetCC(CC)) {
4713 unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
4714 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4715 uint64_t RHSV = RHSC->getZExtValue();
4716 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4717 unsigned ShiftBits = countLeadingZeros(Mask);
4718 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4719 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4720 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4721 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4722 }
4723 }
4724 }
4725
4726 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4727 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4728 // way a cmp would.
4729 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4730 // some tweaks to the heuristics for the previous and->shift transform.
4731 // FIXME: Optimize cases where the LHS isn't a shift.
4732 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4733 isa<ConstantSDNode>(RHS) &&
4734 cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U &&
4735 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4736 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) {
4737 unsigned ShiftAmt =
4738 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1;
4739 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4740 DAG.getVTList(MVT::i32, MVT::i32),
4741 LHS.getOperand(0),
4742 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4743 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4744 Shift.getValue(1), SDValue());
4745 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4746 return Chain.getValue(1);
4747 }
4748
4749 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4750
4751 // If the RHS is a constant zero then the V (overflow) flag will never be
4752 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4753 // simpler for other passes (like the peephole optimiser) to deal with.
4754 if (isNullConstant(RHS)) {
4755 switch (CondCode) {
4756 default: break;
4757 case ARMCC::GE:
4758 CondCode = ARMCC::PL;
4759 break;
4760 case ARMCC::LT:
4761 CondCode = ARMCC::MI;
4762 break;
4763 }
4764 }
4765
4766 ARMISD::NodeType CompareType;
4767 switch (CondCode) {
4768 default:
4769 CompareType = ARMISD::CMP;
4770 break;
4771 case ARMCC::EQ:
4772 case ARMCC::NE:
4773 // Uses only Z Flag
4774 CompareType = ARMISD::CMPZ;
4775 break;
4776 }
4777 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4778 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4779}
4780
4781/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4782SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4783 SelectionDAG &DAG, const SDLoc &dl,
4784 bool Signaling) const {
4785 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast<void> (0));
4786 SDValue Cmp;
4787 if (!isFloatingPointZero(RHS))
4788 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4789 dl, MVT::Glue, LHS, RHS);
4790 else
4791 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4792 dl, MVT::Glue, LHS);
4793 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4794}
4795
4796/// duplicateCmp - Glue values can have only one use, so this function
4797/// duplicates a comparison node.
4798SDValue
4799ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4800 unsigned Opc = Cmp.getOpcode();
4801 SDLoc DL(Cmp);
4802 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4803 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4804
4805 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast<void> (0));
4806 Cmp = Cmp.getOperand(0);
4807 Opc = Cmp.getOpcode();
4808 if (Opc == ARMISD::CMPFP)
4809 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4810 else {
4811 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast<void> (0));
4812 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4813 }
4814 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4815}
4816
4817// This function returns three things: the arithmetic computation itself
4818// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4819// comparison and the condition code define the case in which the arithmetic
4820// computation *does not* overflow.
4821std::pair<SDValue, SDValue>
4822ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4823 SDValue &ARMcc) const {
4824 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast<void> (0));
4825
4826 SDValue Value, OverflowCmp;
4827 SDValue LHS = Op.getOperand(0);
4828 SDValue RHS = Op.getOperand(1);
4829 SDLoc dl(Op);
4830
4831 // FIXME: We are currently always generating CMPs because we don't support
4832 // generating CMN through the backend. This is not as good as the natural
4833 // CMP case because it causes a register dependency and cannot be folded
4834 // later.
4835
4836 switch (Op.getOpcode()) {
4837 default:
4838 llvm_unreachable("Unknown overflow instruction!")__builtin_unreachable();
4839 case ISD::SADDO:
4840 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4841 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4842 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4843 break;
4844 case ISD::UADDO:
4845 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4846 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4847 // We do not use it in the USUBO case as Value may not be used.
4848 Value = DAG.getNode(ARMISD::ADDC, dl,
4849 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4850 .getValue(0);
4851 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4852 break;
4853 case ISD::SSUBO:
4854 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4855 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4856 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4857 break;
4858 case ISD::USUBO:
4859 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4860 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4861 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4862 break;
4863 case ISD::UMULO:
4864 // We generate a UMUL_LOHI and then check if the high word is 0.
4865 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4866 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4867 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4868 LHS, RHS);
4869 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4870 DAG.getConstant(0, dl, MVT::i32));
4871 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4872 break;
4873 case ISD::SMULO:
4874 // We generate a SMUL_LOHI and then check if all the bits of the high word
4875 // are the same as the sign bit of the low word.
4876 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4877 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4878 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4879 LHS, RHS);
4880 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4881 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4882 Value.getValue(0),
4883 DAG.getConstant(31, dl, MVT::i32)));
4884 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4885 break;
4886 } // switch (...)
4887
4888 return std::make_pair(Value, OverflowCmp);
4889}
4890
4891SDValue
4892ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4893 // Let legalize expand this if it isn't a legal type yet.
4894 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4895 return SDValue();
4896
4897 SDValue Value, OverflowCmp;
4898 SDValue ARMcc;
4899 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4900 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4901 SDLoc dl(Op);
4902 // We use 0 and 1 as false and true values.
4903 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4904 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4905 EVT VT = Op.getValueType();
4906
4907 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4908 ARMcc, CCR, OverflowCmp);
4909
4910 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4911 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4912}
4913
4914static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
4915 SelectionDAG &DAG) {
4916 SDLoc DL(BoolCarry);
4917 EVT CarryVT = BoolCarry.getValueType();
4918
4919 // This converts the boolean value carry into the carry flag by doing
4920 // ARMISD::SUBC Carry, 1
4921 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
4922 DAG.getVTList(CarryVT, MVT::i32),
4923 BoolCarry, DAG.getConstant(1, DL, CarryVT));
4924 return Carry.getValue(1);
4925}
4926
4927static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
4928 SelectionDAG &DAG) {
4929 SDLoc DL(Flags);
4930
4931 // Now convert the carry flag into a boolean carry. We do this
4932 // using ARMISD:ADDE 0, 0, Carry
4933 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
4934 DAG.getConstant(0, DL, MVT::i32),
4935 DAG.getConstant(0, DL, MVT::i32), Flags);
4936}
4937
4938SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
4939 SelectionDAG &DAG) const {
4940 // Let legalize expand this if it isn't a legal type yet.
4941 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4942 return SDValue();
4943
4944 SDValue LHS = Op.getOperand(0);
4945 SDValue RHS = Op.getOperand(1);
4946 SDLoc dl(Op);
4947
4948 EVT VT = Op.getValueType();
4949 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4950 SDValue Value;
4951 SDValue Overflow;
4952 switch (Op.getOpcode()) {
4953 default:
4954 llvm_unreachable("Unknown overflow instruction!")__builtin_unreachable();
4955 case ISD::UADDO:
4956 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4957 // Convert the carry flag into a boolean value.
4958 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4959 break;
4960 case ISD::USUBO: {
4961 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4962 // Convert the carry flag into a boolean value.
4963 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4964 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4965 // value. So compute 1 - C.
4966 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4967 DAG.getConstant(1, dl, MVT::i32), Overflow);
4968 break;
4969 }
4970 }
4971
4972 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4973}
4974
4975static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
4976 const ARMSubtarget *Subtarget) {
4977 EVT VT = Op.getValueType();
4978 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
4979 return SDValue();
4980 if (!VT.isSimple())
4981 return SDValue();
4982
4983 unsigned NewOpcode;
4984 switch (VT.getSimpleVT().SimpleTy) {
4985 default:
4986 return SDValue();
4987 case MVT::i8:
4988 switch (Op->getOpcode()) {
4989 case ISD::UADDSAT:
4990 NewOpcode = ARMISD::UQADD8b;
4991 break;
4992 case ISD::SADDSAT:
4993 NewOpcode = ARMISD::QADD8b;
4994 break;
4995 case ISD::USUBSAT:
4996 NewOpcode = ARMISD::UQSUB8b;
4997 break;
4998 case ISD::SSUBSAT:
4999 NewOpcode = ARMISD::QSUB8b;
5000 break;
5001 }
5002 break;
5003 case MVT::i16:
5004 switch (Op->getOpcode()) {
5005 case ISD::UADDSAT:
5006 NewOpcode = ARMISD::UQADD16b;
5007 break;
5008 case ISD::SADDSAT:
5009 NewOpcode = ARMISD::QADD16b;
5010 break;
5011 case ISD::USUBSAT:
5012 NewOpcode = ARMISD::UQSUB16b;
5013 break;
5014 case ISD::SSUBSAT:
5015 NewOpcode = ARMISD::QSUB16b;
5016 break;
5017 }
5018 break;
5019 }
5020
5021 SDLoc dl(Op);
5022 SDValue Add =
5023 DAG.getNode(NewOpcode, dl, MVT::i32,
5024 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5025 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5026 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5027}
5028
5029SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5030 SDValue Cond = Op.getOperand(0);
5031 SDValue SelectTrue = Op.getOperand(1);
5032 SDValue SelectFalse = Op.getOperand(2);
5033 SDLoc dl(Op);
5034 unsigned Opc = Cond.getOpcode();
5035
5036 if (Cond.getResNo() == 1 &&
5037 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5038 Opc == ISD::USUBO)) {
5039 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5040 return SDValue();
5041
5042 SDValue Value, OverflowCmp;
5043 SDValue ARMcc;
5044 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5045 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5046 EVT VT = Op.getValueType();
5047
5048 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5049 OverflowCmp, DAG);
5050 }
5051
5052 // Convert:
5053 //
5054 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5055 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5056 //
5057 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5058 const ConstantSDNode *CMOVTrue =
5059 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5060 const ConstantSDNode *CMOVFalse =
5061 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5062
5063 if (CMOVTrue && CMOVFalse) {
5064 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5065 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5066
5067 SDValue True;
5068 SDValue False;
5069 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5070 True = SelectTrue;
5071 False = SelectFalse;
5072 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5073 True = SelectFalse;
5074 False = SelectTrue;
5075 }
5076
5077 if (True.getNode() && False.getNode()) {
5078 EVT VT = Op.getValueType();
5079 SDValue ARMcc = Cond.getOperand(2);
5080 SDValue CCR = Cond.getOperand(3);
5081 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5082 assert(True.getValueType() == VT)(static_cast<void> (0));
5083 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5084 }
5085 }
5086 }
5087
5088 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5089 // undefined bits before doing a full-word comparison with zero.
5090 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5091 DAG.getConstant(1, dl, Cond.getValueType()));
5092
5093 return DAG.getSelectCC(dl, Cond,
5094 DAG.getConstant(0, dl, Cond.getValueType()),
5095 SelectTrue, SelectFalse, ISD::SETNE);
5096}
5097
5098static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
5099 bool &swpCmpOps, bool &swpVselOps) {
5100 // Start by selecting the GE condition code for opcodes that return true for
5101 // 'equality'
5102 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5103 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5104 CondCode = ARMCC::GE;
5105
5106 // and GT for opcodes that return false for 'equality'.
5107 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5108 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5109 CondCode = ARMCC::GT;
5110
5111 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5112 // to swap the compare operands.
5113 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5114 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5115 swpCmpOps = true;
5116
5117 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5118 // If we have an unordered opcode, we need to swap the operands to the VSEL
5119 // instruction (effectively negating the condition).
5120 //
5121 // This also has the effect of swapping which one of 'less' or 'greater'
5122 // returns true, so we also swap the compare operands. It also switches
5123 // whether we return true for 'equality', so we compensate by picking the
5124 // opposite condition code to our original choice.
5125 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5126 CC == ISD::SETUGT) {
5127 swpCmpOps = !swpCmpOps;
5128 swpVselOps = !swpVselOps;
5129 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5130 }
5131
5132 // 'ordered' is 'anything but unordered', so use the VS condition code and
5133 // swap the VSEL operands.
5134 if (CC == ISD::SETO) {
5135 CondCode = ARMCC::VS;
5136 swpVselOps = true;
5137 }
5138
5139 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5140 // code and swap the VSEL operands. Also do this if we don't care about the
5141 // unordered case.
5142 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5143 CondCode = ARMCC::EQ;
5144 swpVselOps = true;
5145 }
5146}
5147
5148SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5149 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5150 SDValue Cmp, SelectionDAG &DAG) const {
5151 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5152 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5153 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5154 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
5155 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5156
5157 SDValue TrueLow = TrueVal.getValue(0);
5158 SDValue TrueHigh = TrueVal.getValue(1);
5159 SDValue FalseLow = FalseVal.getValue(0);
5160 SDValue FalseHigh = FalseVal.getValue(1);
5161
5162 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5163 ARMcc, CCR, Cmp);
5164 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5165 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5166
5167 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5168 } else {
5169 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5170 Cmp);
5171 }
5172}
5173
5174static bool isGTorGE(ISD::CondCode CC) {
5175 return CC == ISD::SETGT || CC == ISD::SETGE;
5176}
5177
5178static bool isLTorLE(ISD::CondCode CC) {
5179 return CC == ISD::SETLT || CC == ISD::SETLE;
5180}
5181
5182// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5183// All of these conditions (and their <= and >= counterparts) will do:
5184// x < k ? k : x
5185// x > k ? x : k
5186// k < x ? x : k
5187// k > x ? k : x
5188static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5189 const SDValue TrueVal, const SDValue FalseVal,
5190 const ISD::CondCode CC, const SDValue K) {
5191 return (isGTorGE(CC) &&
5192 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5193 (isLTorLE(CC) &&
5194 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5195}
5196
5197// Check if two chained conditionals could be converted into SSAT or USAT.
5198//
5199// SSAT can replace a set of two conditional selectors that bound a number to an
5200// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5201//
5202// x < -k ? -k : (x > k ? k : x)
5203// x < -k ? -k : (x < k ? x : k)
5204// x > -k ? (x > k ? k : x) : -k
5205// x < k ? (x < -k ? -k : x) : k
5206// etc.
5207//
5208// LLVM canonicalizes these to either a min(max()) or a max(min())
5209// pattern. This function tries to match one of these and will return a SSAT
5210// node if successful.
5211//
5212// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5213// is a power of 2.
5214static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
5215 EVT VT = Op.getValueType();
5216 SDValue V1 = Op.getOperand(0);
5217 SDValue K1 = Op.getOperand(1);
5218 SDValue TrueVal1 = Op.getOperand(2);
5219 SDValue FalseVal1 = Op.getOperand(3);
5220 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5221
5222 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5223 if (Op2.getOpcode() != ISD::SELECT_CC)
5224 return SDValue();
5225
5226 SDValue V2 = Op2.getOperand(0);
5227 SDValue K2 = Op2.getOperand(1);
5228 SDValue TrueVal2 = Op2.getOperand(2);
5229 SDValue FalseVal2 = Op2.getOperand(3);
5230 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5231
5232 SDValue V1Tmp = V1;
5233 SDValue V2Tmp = V2;
5234
5235 // Check that the registers and the constants match a max(min()) or min(max())
5236 // pattern
5237 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5238 K2 != FalseVal2 ||
5239 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5240 return SDValue();
5241
5242 // Check that the constant in the lower-bound check is
5243 // the opposite of the constant in the upper-bound check
5244 // in 1's complement.
5245 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5246 return SDValue();
5247
5248 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5249 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5250 int64_t PosVal = std::max(Val1, Val2);
5251 int64_t NegVal = std::min(Val1, Val2);
5252
5253 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5254 !isPowerOf2_64(PosVal + 1))
5255 return SDValue();
5256
5257 // Handle the difference between USAT (unsigned) and SSAT (signed)
5258 // saturation
5259 // At this point, PosVal is guaranteed to be positive
5260 uint64_t K = PosVal;
5261 SDLoc dl(Op);
5262 if (Val1 == ~Val2)
5263 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5264 DAG.getConstant(countTrailingOnes(K), dl, VT));
5265 if (NegVal == 0)
5266 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5267 DAG.getConstant(countTrailingOnes(K), dl, VT));
5268
5269 return SDValue();
5270}
5271
5272// Check if a condition of the type x < k ? k : x can be converted into a
5273// bit operation instead of conditional moves.
5274// Currently this is allowed given:
5275// - The conditions and values match up
5276// - k is 0 or -1 (all ones)
5277// This function will not check the last condition, thats up to the caller
5278// It returns true if the transformation can be made, and in such case
5279// returns x in V, and k in SatK.
5280static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
5281 SDValue &SatK)
5282{
5283 SDValue LHS = Op.getOperand(0);
5284 SDValue RHS = Op.getOperand(1);
5285 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5286 SDValue TrueVal = Op.getOperand(2);
5287 SDValue FalseVal = Op.getOperand(3);
5288
5289 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5290 ? &RHS
5291 : nullptr;
5292
5293 // No constant operation in comparison, early out
5294 if (!K)
5295 return false;
5296
5297 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5298 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5299 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5300
5301 // If the constant on left and right side, or variable on left and right,
5302 // does not match, early out
5303 if (*K != KTmp || V != VTmp)
5304 return false;
5305
5306 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5307 SatK = *K;
5308 return true;
5309 }
5310
5311 return false;
5312}
5313
5314bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5315 if (VT == MVT::f32)
5316 return !Subtarget->hasVFP2Base();
5317 if (VT == MVT::f64)
5318 return !Subtarget->hasFP64();
5319 if (VT == MVT::f16)
5320 return !Subtarget->hasFullFP16();
5321 return false;
5322}
5323
5324SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5325 EVT VT = Op.getValueType();
5326 SDLoc dl(Op);
5327
5328 // Try to convert two saturating conditional selects into a single SSAT
5329 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5330 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5331 return SatValue;
5332
5333 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5334 // into more efficient bit operations, which is possible when k is 0 or -1
5335 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5336 // single instructions. On Thumb the shift and the bit operation will be two
5337 // instructions.
5338 // Only allow this transformation on full-width (32-bit) operations
5339 SDValue LowerSatConstant;
5340 SDValue SatValue;
5341 if (VT == MVT::i32 &&
5342 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5343 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5344 DAG.getConstant(31, dl, VT));
5345 if (isNullConstant(LowerSatConstant)) {
5346 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5347 DAG.getAllOnesConstant(dl, VT));
5348 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5349 } else if (isAllOnesConstant(LowerSatConstant))
5350 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5351 }
5352
5353 SDValue LHS = Op.getOperand(0);
5354 SDValue RHS = Op.getOperand(1);
5355 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5356 SDValue TrueVal = Op.getOperand(2);
5357 SDValue FalseVal = Op.getOperand(3);
5358 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5359 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5360
5361 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5362 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5363 unsigned TVal = CTVal->getZExtValue();
5364 unsigned FVal = CFVal->getZExtValue();
5365 unsigned Opcode = 0;
5366
5367 if (TVal == ~FVal) {
5368 Opcode = ARMISD::CSINV;
5369 } else if (TVal == ~FVal + 1) {
5370 Opcode = ARMISD::CSNEG;
5371 } else if (TVal + 1 == FVal) {
5372 Opcode = ARMISD::CSINC;
5373 } else if (TVal == FVal + 1) {
5374 Opcode = ARMISD::CSINC;
5375 std::swap(TrueVal, FalseVal);
5376 std::swap(TVal, FVal);
5377 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5378 }
5379
5380 if (Opcode) {
5381 // If one of the constants is cheaper than another, materialise the
5382 // cheaper one and let the csel generate the other.
5383 if (Opcode != ARMISD::CSINC &&
5384 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5385 std::swap(TrueVal, FalseVal);
5386 std::swap(TVal, FVal);
5387 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5388 }
5389
5390 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5391 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5392 // -(-a) == a, but (a+1)+1 != a).
5393 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5394 std::swap(TrueVal, FalseVal);
5395 std::swap(TVal, FVal);
5396 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5397 }
5398
5399 // Drops F's value because we can get it by inverting/negating TVal.
5400 FalseVal = TrueVal;
5401
5402 SDValue ARMcc;
5403 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5404 EVT VT = TrueVal.getValueType();
5405 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5406 }
5407 }
5408
5409 if (isUnsupportedFloatingType(LHS.getValueType())) {
5410 DAG.getTargetLoweringInfo().softenSetCCOperands(
5411 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5412
5413 // If softenSetCCOperands only returned one value, we should compare it to
5414 // zero.
5415 if (!RHS.getNode()) {
5416 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5417 CC = ISD::SETNE;
5418 }
5419 }
5420
5421 if (LHS.getValueType() == MVT::i32) {
5422 // Try to generate VSEL on ARMv8.
5423 // The VSEL instruction can't use all the usual ARM condition
5424 // codes: it only has two bits to select the condition code, so it's
5425 // constrained to use only GE, GT, VS and EQ.
5426 //
5427 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5428 // swap the operands of the previous compare instruction (effectively
5429 // inverting the compare condition, swapping 'less' and 'greater') and
5430 // sometimes need to swap the operands to the VSEL (which inverts the
5431 // condition in the sense of firing whenever the previous condition didn't)
5432 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5433 TrueVal.getValueType() == MVT::f32 ||
5434 TrueVal.getValueType() == MVT::f64)) {
5435 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5436 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5437 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5438 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5439 std::swap(TrueVal, FalseVal);
5440 }
5441 }
5442
5443 SDValue ARMcc;
5444 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5445 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5446 // Choose GE over PL, which vsel does now support
5447 if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
5448 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5449 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5450 }
5451
5452 ARMCC::CondCodes CondCode, CondCode2;
5453 FPCCToARMCC(CC, CondCode, CondCode2);
5454
5455 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5456 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5457 // must use VSEL (limited condition codes), due to not having conditional f16
5458 // moves.
5459 if (Subtarget->hasFPARMv8Base() &&
5460 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5461 (TrueVal.getValueType() == MVT::f16 ||
5462 TrueVal.getValueType() == MVT::f32 ||
5463 TrueVal.getValueType() == MVT::f64)) {
5464 bool swpCmpOps = false;
5465 bool swpVselOps = false;
5466 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5467
5468 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5469 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5470 if (swpCmpOps)
5471 std::swap(LHS, RHS);
5472 if (swpVselOps)
5473 std::swap(TrueVal, FalseVal);
5474 }
5475 }
5476
5477 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5478 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5479 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5480 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5481 if (CondCode2 != ARMCC::AL) {
5482 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5483 // FIXME: Needs another CMP because flag can have but one use.
5484 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5485 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5486 }
5487 return Result;
5488}
5489
5490/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5491/// to morph to an integer compare sequence.
5492static bool canChangeToInt(SDValue Op, bool &SeenZero,
5493 const ARMSubtarget *Subtarget) {
5494 SDNode *N = Op.getNode();
5495 if (!N->hasOneUse())
5496 // Otherwise it requires moving the value from fp to integer registers.
5497 return false;
5498 if (!N->getNumValues())
5499 return false;
5500 EVT VT = Op.getValueType();
5501 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5502 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5503 // vmrs are very slow, e.g. cortex-a8.
5504 return false;
5505
5506 if (isFloatingPointZero(Op)) {
5507 SeenZero = true;
5508 return true;
5509 }
5510 return ISD::isNormalLoad(N);
5511}
5512
5513static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
5514 if (isFloatingPointZero(Op))
5515 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5516
5517 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5518 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5519 Ld->getPointerInfo(), Ld->getAlignment(),
5520 Ld->getMemOperand()->getFlags());
5521
5522 llvm_unreachable("Unknown VFP cmp argument!")__builtin_unreachable();
5523}
5524
5525static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
5526 SDValue &RetVal1, SDValue &RetVal2) {
5527 SDLoc dl(Op);
5528
5529 if (isFloatingPointZero(Op)) {
5530 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5531 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5532 return;
5533 }
5534
5535 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5536 SDValue Ptr = Ld->getBasePtr();
5537 RetVal1 =
5538 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5539 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
5540
5541 EVT PtrType = Ptr.getValueType();
5542 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
5543 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5544 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5545 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5546 Ld->getPointerInfo().getWithOffset(4), NewAlign,
5547 Ld->getMemOperand()->getFlags());
5548 return;
5549 }
5550
5551 llvm_unreachable("Unknown VFP cmp argument!")__builtin_unreachable();
5552}
5553
5554/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5555/// f32 and even f64 comparisons to integer ones.
5556SDValue
5557ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5558 SDValue Chain = Op.getOperand(0);
5559 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5560 SDValue LHS = Op.getOperand(2);
5561 SDValue RHS = Op.getOperand(3);
5562 SDValue Dest = Op.getOperand(4);
5563 SDLoc dl(Op);
5564
5565 bool LHSSeenZero = false;
5566 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5567 bool RHSSeenZero = false;
5568 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5569 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5570 // If unsafe fp math optimization is enabled and there are no other uses of
5571 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5572 // to an integer comparison.
5573 if (CC == ISD::SETOEQ)
5574 CC = ISD::SETEQ;
5575 else if (CC == ISD::SETUNE)
5576 CC = ISD::SETNE;
5577
5578 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5579 SDValue ARMcc;
5580 if (LHS.getValueType() == MVT::f32) {
5581 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5582 bitcastf32Toi32(LHS, DAG), Mask);
5583 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5584 bitcastf32Toi32(RHS, DAG), Mask);
5585 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5586 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5587 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5588 Chain, Dest, ARMcc, CCR, Cmp);
5589 }
5590
5591 SDValue LHS1, LHS2;
5592 SDValue RHS1, RHS2;
5593 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5594 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5595 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5596 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5597 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
5598 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5599 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5600 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5601 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5602 }
5603
5604 return SDValue();
5605}
5606
5607SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5608 SDValue Chain = Op.getOperand(0);
5609 SDValue Cond = Op.getOperand(1);
5610 SDValue Dest = Op.getOperand(2);
5611 SDLoc dl(Op);
5612
5613 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5614 // instruction.
5615 unsigned Opc = Cond.getOpcode();
5616 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5617 !Subtarget->isThumb1Only();
5618 if (Cond.getResNo() == 1 &&
5619 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5620 Opc == ISD::USUBO || OptimizeMul)) {
5621 // Only lower legal XALUO ops.
5622 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5623 return SDValue();
5624
5625 // The actual operation with overflow check.
5626 SDValue Value, OverflowCmp;
5627 SDValue ARMcc;
5628 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5629
5630 // Reverse the condition code.
5631 ARMCC::CondCodes CondCode =
5632 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5633 CondCode = ARMCC::getOppositeCondition(CondCode);
5634 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5635 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5636
5637 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5638 OverflowCmp);
5639 }
5640
5641 return SDValue();
5642}
5643
5644SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5645 SDValue Chain = Op.getOperand(0);
5646 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5647 SDValue LHS = Op.getOperand(2);
5648 SDValue RHS = Op.getOperand(3);
5649 SDValue Dest = Op.getOperand(4);
5650 SDLoc dl(Op);
5651
5652 if (isUnsupportedFloatingType(LHS.getValueType())) {
5653 DAG.getTargetLoweringInfo().softenSetCCOperands(
5654 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5655
5656 // If softenSetCCOperands only returned one value, we should compare it to
5657 // zero.
5658 if (!RHS.getNode()) {
5659 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5660 CC = ISD::SETNE;
5661 }
5662 }
5663
5664 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5665 // instruction.
5666 unsigned Opc = LHS.getOpcode();
5667 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5668 !Subtarget->isThumb1Only();
5669 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5670 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5671 Opc == ISD::USUBO || OptimizeMul) &&
5672 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5673 // Only lower legal XALUO ops.
5674 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5675 return SDValue();
5676
5677 // The actual operation with overflow check.
5678 SDValue Value, OverflowCmp;
5679 SDValue ARMcc;
5680 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5681
5682 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5683 // Reverse the condition code.
5684 ARMCC::CondCodes CondCode =
5685 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5686 CondCode = ARMCC::getOppositeCondition(CondCode);
5687 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5688 }
5689 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5690
5691 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5692 OverflowCmp);
5693 }
5694
5695 if (LHS.getValueType() == MVT::i32) {
5696 SDValue ARMcc;
5697 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5698 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5699 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5700 Chain, Dest, ARMcc, CCR, Cmp);
5701 }
5702
5703 if (getTargetMachine().Options.UnsafeFPMath &&
5704 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5705 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5706 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5707 return Result;
5708 }
5709
5710 ARMCC::CondCodes CondCode, CondCode2;
5711 FPCCToARMCC(CC, CondCode, CondCode2);
5712
5713 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5714 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5715 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5716 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5717 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5718 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5719 if (CondCode2 != ARMCC::AL) {
5720 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5721 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5722 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5723 }
5724 return Res;
5725}
5726
5727SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5728 SDValue Chain = Op.getOperand(0);
5729 SDValue Table = Op.getOperand(1);
5730 SDValue Index = Op.getOperand(2);
5731 SDLoc dl(Op);
5732
5733 EVT PTy = getPointerTy(DAG.getDataLayout());
5734 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5735 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5736 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5737 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5738 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5739 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5740 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5741 // which does another jump to the destination. This also makes it easier
5742 // to translate it to TBB / TBH later (Thumb2 only).
5743 // FIXME: This might not work if the function is extremely large.
5744 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5745 Addr, Op.getOperand(2), JTI);
5746 }
5747 if (isPositionIndependent() || Subtarget->isROPI()) {
5748 Addr =
5749 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5750 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5751 Chain = Addr.getValue(1);
5752 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5753 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5754 } else {
5755 Addr =
5756 DAG.getLoad(PTy, dl, Chain, Addr,
5757 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
5758 Chain = Addr.getValue(1);
5759 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5760 }
5761}
5762
5763static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
5764 EVT VT = Op.getValueType();
5765 SDLoc dl(Op);
5766
5767 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5768 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5769 return Op;
5770 return DAG.UnrollVectorOp(Op.getNode());
5771 }
5772
5773 const bool HasFullFP16 =
5774 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5775
5776 EVT NewTy;
5777 const EVT OpTy = Op.getOperand(0).getValueType();
5778 if (OpTy == MVT::v4f32)
5779 NewTy = MVT::v4i32;
5780 else if (OpTy == MVT::v4f16 && HasFullFP16)
5781 NewTy = MVT::v4i16;
5782 else if (OpTy == MVT::v8f16 && HasFullFP16)
5783 NewTy = MVT::v8i16;
5784 else
5785 llvm_unreachable("Invalid type for custom lowering!")__builtin_unreachable();
5786
5787 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5788 return DAG.UnrollVectorOp(Op.getNode());
5789
5790 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5791 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5792}
5793
5794SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5795 EVT VT = Op.getValueType();
5796 if (VT.isVector())
5797 return LowerVectorFP_TO_INT(Op, DAG);
5798
5799 bool IsStrict = Op->isStrictFPOpcode();
5800 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5801
5802 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5803 RTLIB::Libcall LC;
5804 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5805 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5806 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5807 Op.getValueType());
5808 else
5809 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5810 Op.getValueType());
5811 SDLoc Loc(Op);
5812 MakeLibCallOptions CallOptions;
5813 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5814 SDValue Result;
5815 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5816 CallOptions, Loc, Chain);
5817 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5818 }
5819
5820 // FIXME: Remove this when we have strict fp instruction selection patterns
5821 if (IsStrict) {
5822 SDLoc Loc(Op);
5823 SDValue Result =
5824 DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT
5825 : ISD::FP_TO_UINT,
5826 Loc, Op.getValueType(), SrcVal);
5827 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5828 }
5829
5830 return Op;
5831}
5832
5833static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
5834 const ARMSubtarget *Subtarget) {
5835 EVT VT = Op.getValueType();
5836 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5837 EVT FromVT = Op.getOperand(0).getValueType();
5838
5839 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5840 return Op;
5841 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5842 Subtarget->hasFP64())
5843 return Op;
5844 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5845 Subtarget->hasFullFP16())
5846 return Op;
5847 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5848 Subtarget->hasMVEFloatOps())
5849 return Op;
5850 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5851 Subtarget->hasMVEFloatOps())
5852 return Op;
5853
5854 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5855 return SDValue();
5856
5857 SDLoc DL(Op);
5858 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5859 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5860 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5861 DAG.getValueType(VT.getScalarType()));
5862 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5863 DAG.getConstant((1 << BW) - 1, DL, VT));
5864 if (IsSigned)
5865 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5866 DAG.getConstant(-(1 << BW), DL, VT));
5867 return Max;
5868}
5869
5870static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
5871 EVT VT = Op.getValueType();
5872 SDLoc dl(Op);
5873
5874 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5875 if (VT.getVectorElementType() == MVT::f32)
5876 return Op;
5877 return DAG.UnrollVectorOp(Op.getNode());
5878 }
5879
5880 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast<void> (0))
5881 Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast<void> (0))
5882 "Invalid type for custom lowering!")(static_cast<void> (0));
5883
5884 const bool HasFullFP16 =
5885 static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
5886
5887 EVT DestVecType;
5888 if (VT == MVT::v4f32)
5889 DestVecType = MVT::v4i32;
5890 else if (VT == MVT::v4f16 && HasFullFP16)
5891 DestVecType = MVT::v4i16;
5892 else if (VT == MVT::v8f16 && HasFullFP16)
5893 DestVecType = MVT::v8i16;
5894 else
5895 return DAG.UnrollVectorOp(Op.getNode());
5896
5897 unsigned CastOpc;
5898 unsigned Opc;
5899 switch (Op.getOpcode()) {
5900 default: llvm_unreachable("Invalid opcode!")__builtin_unreachable();
5901 case ISD::SINT_TO_FP:
5902 CastOpc = ISD::SIGN_EXTEND;
5903 Opc = ISD::SINT_TO_FP;
5904 break;
5905 case ISD::UINT_TO_FP:
5906 CastOpc = ISD::ZERO_EXTEND;
5907 Opc = ISD::UINT_TO_FP;
5908 break;
5909 }
5910
5911 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5912 return DAG.getNode(Opc, dl, VT, Op);
5913}
5914
5915SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5916 EVT VT = Op.getValueType();
5917 if (VT.isVector())
5918 return LowerVectorINT_TO_FP(Op, DAG);
5919 if (isUnsupportedFloatingType(VT)) {
5920 RTLIB::Libcall LC;
5921 if (Op.getOpcode() == ISD::SINT_TO_FP)
5922 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
5923 Op.getValueType());
5924 else
5925 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
5926 Op.getValueType());
5927 MakeLibCallOptions CallOptions;
5928 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
5929 CallOptions, SDLoc(Op)).first;
5930 }
5931
5932 return Op;
5933}
5934
5935SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
5936 // Implement fcopysign with a fabs and a conditional fneg.
5937 SDValue Tmp0 = Op.getOperand(0);
5938 SDValue Tmp1 = Op.getOperand(1);
5939 SDLoc dl(Op);
5940 EVT VT = Op.getValueType();
5941 EVT SrcVT = Tmp1.getValueType();
5942 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
5943 Tmp0.getOpcode() == ARMISD::VMOVDRR;
5944 bool UseNEON = !InGPR && Subtarget->hasNEON();
5945
5946 if (UseNEON) {
5947 // Use VBSL to copy the sign bit.
5948 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
5949 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
5950 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
5951 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
5952 if (VT == MVT::f64)
5953 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5954 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
5955 DAG.getConstant(32, dl, MVT::i32));
5956 else /*if (VT == MVT::f32)*/
5957 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
5958 if (SrcVT == MVT::f32) {
5959 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
5960 if (VT == MVT::f64)
5961 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
5962 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
5963 DAG.getConstant(32, dl, MVT::i32));
5964 } else if (VT == MVT::f32)
5965 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
5966 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
5967 DAG.getConstant(32, dl, MVT::i32));
5968 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
5969 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
5970
5971 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),
5972 dl, MVT::i32);
5973 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
5974 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
5975 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
5976
5977 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
5978 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
5979 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
5980 if (VT == MVT::f32) {
5981 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
5982 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
5983 DAG.getConstant(0, dl, MVT::i32));
5984 } else {
5985 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
5986 }
5987
5988 return Res;
5989 }
5990
5991 // Bitcast operand 1 to i32.
5992 if (SrcVT == MVT::f64)
5993 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
5994 Tmp1).getValue(1);
5995 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
5996
5997 // Or in the signbit with integer operations.
5998 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
5999 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6000 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6001 if (VT == MVT::f32) {
6002 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6003 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6004 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6005 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6006 }
6007
6008 // f64: Or the high part with signbit and then combine two parts.
6009 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6010 Tmp0);
6011 SDValue Lo = Tmp0.getValue(0);
6012 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6013 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6014 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6015}
6016
6017SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6018 MachineFunction &MF = DAG.getMachineFunction();
6019 MachineFrameInfo &MFI = MF.getFrameInfo();
6020 MFI.setReturnAddressIsTaken(true);
6021
6022 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
6023 return SDValue();
6024
6025 EVT VT = Op.getValueType();
6026 SDLoc dl(Op);
6027 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6028 if (Depth) {
6029 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6030 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6031 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6032 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6033 MachinePointerInfo());
6034 }
6035
6036 // Return LR, which contains the return address. Mark it an implicit live-in.
6037 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6038 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6039}
6040
6041SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6042 const ARMBaseRegisterInfo &ARI =
6043 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6044 MachineFunction &MF = DAG.getMachineFunction();
6045 MachineFrameInfo &MFI = MF.getFrameInfo();
6046 MFI.setFrameAddressIsTaken(true);
6047
6048 EVT VT = Op.getValueType();
6049 SDLoc dl(Op); // FIXME probably not meaningful
6050 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6051 Register FrameReg = ARI.getFrameRegister(MF);
6052 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6053 while (Depth--)
6054 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6055 MachinePointerInfo());
6056 return FrameAddr;
6057}
6058
6059// FIXME? Maybe this could be a TableGen attribute on some registers and
6060// this table could be generated automatically from RegInfo.
6061Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6062 const MachineFunction &MF) const {
6063 Register Reg = StringSwitch<unsigned>(RegName)
6064 .Case("sp", ARM::SP)
6065 .Default(0);
6066 if (Reg)
6067 return Reg;
6068 report_fatal_error(Twine("Invalid register name \""
6069 + StringRef(RegName) + "\"."));
6070}
6071
6072// Result is 64 bit value so split into two 32 bit values and return as a
6073// pair of values.
6074static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
6075 SelectionDAG &DAG) {
6076 SDLoc DL(N);
6077
6078 // This function is only supposed to be called for i64 type destination.
6079 assert(N->getValueType(0) == MVT::i64(static_cast<void> (0))
6080 && "ExpandREAD_REGISTER called for non-i64 type result.")(static_cast<void> (0));
6081
6082 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
6083 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6084 N->getOperand(0),
6085 N->getOperand(1));
6086
6087 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6088 Read.getValue(1)));
6089 Results.push_back(Read.getOperand(0));
6090}
6091
6092/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6093/// When \p DstVT, the destination type of \p BC, is on the vector
6094/// register bank and the source of bitcast, \p Op, operates on the same bank,
6095/// it might be possible to combine them, such that everything stays on the
6096/// vector register bank.
6097/// \p return The node that would replace \p BT, if the combine
6098/// is possible.
6099static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
6100 SelectionDAG &DAG) {
6101 SDValue Op = BC->getOperand(0);
6102 EVT DstVT = BC->getValueType(0);
6103
6104 // The only vector instruction that can produce a scalar (remember,
6105 // since the bitcast was about to be turned into VMOVDRR, the source
6106 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6107 // Moreover, we can do this combine only if there is one use.
6108 // Finally, if the destination type is not a vector, there is not
6109 // much point on forcing everything on the vector bank.
6110 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6111 !Op.hasOneUse())
6112 return SDValue();
6113
6114 // If the index is not constant, we will introduce an additional
6115 // multiply that will stick.
6116 // Give up in that case.
6117 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6118 if (!Index)
6119 return SDValue();
6120 unsigned DstNumElt = DstVT.getVectorNumElements();
6121
6122 // Compute the new index.
6123 const APInt &APIntIndex = Index->getAPIntValue();
6124 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6125 NewIndex *= APIntIndex;
6126 // Check if the new constant index fits into i32.
6127 if (NewIndex.getBitWidth() > 32)
6128 return SDValue();
6129
6130 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6131 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6132 SDLoc dl(Op);
6133 SDValue ExtractSrc = Op.getOperand(0);
6134 EVT VecVT = EVT::getVectorVT(
6135 *DAG.getContext(), DstVT.getScalarType(),
6136 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6137 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6138 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6139 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6140}
6141
6142/// ExpandBITCAST - If the target supports VFP, this function is called to
6143/// expand a bit convert where either the source or destination type is i64 to
6144/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6145/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6146/// vectors), since the legalizer won't know what to do with that.
6147SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6148 const ARMSubtarget *Subtarget) const {
6149 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6150 SDLoc dl(N);
6151 SDValue Op = N->getOperand(0);
6152
6153 // This function is only supposed to be called for i16 and i64 types, either
6154 // as the source or destination of the bit convert.
6155 EVT SrcVT = Op.getValueType();
6156 EVT DstVT = N->getValueType(0);
6157
6158 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6159 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6160 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6161 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6162
6163 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6164 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6165 return DAG.getNode(
6166 ISD::TRUNCATE, SDLoc(N), DstVT,
6167 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6168
6169 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6170 return SDValue();
6171
6172 // Turn i64->f64 into VMOVDRR.
6173