File: | build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/ARM/ARMISelLowering.cpp |
Warning: | line 2667, column 20 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file defines the interfaces that ARM uses to lower LLVM code into a | ||||
10 | // selection DAG. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "ARMISelLowering.h" | ||||
15 | #include "ARMBaseInstrInfo.h" | ||||
16 | #include "ARMBaseRegisterInfo.h" | ||||
17 | #include "ARMCallingConv.h" | ||||
18 | #include "ARMConstantPoolValue.h" | ||||
19 | #include "ARMMachineFunctionInfo.h" | ||||
20 | #include "ARMPerfectShuffle.h" | ||||
21 | #include "ARMRegisterInfo.h" | ||||
22 | #include "ARMSelectionDAGInfo.h" | ||||
23 | #include "ARMSubtarget.h" | ||||
24 | #include "ARMTargetTransformInfo.h" | ||||
25 | #include "MCTargetDesc/ARMAddressingModes.h" | ||||
26 | #include "MCTargetDesc/ARMBaseInfo.h" | ||||
27 | #include "Utils/ARMBaseInfo.h" | ||||
28 | #include "llvm/ADT/APFloat.h" | ||||
29 | #include "llvm/ADT/APInt.h" | ||||
30 | #include "llvm/ADT/ArrayRef.h" | ||||
31 | #include "llvm/ADT/BitVector.h" | ||||
32 | #include "llvm/ADT/DenseMap.h" | ||||
33 | #include "llvm/ADT/STLExtras.h" | ||||
34 | #include "llvm/ADT/SmallPtrSet.h" | ||||
35 | #include "llvm/ADT/SmallVector.h" | ||||
36 | #include "llvm/ADT/Statistic.h" | ||||
37 | #include "llvm/ADT/StringExtras.h" | ||||
38 | #include "llvm/ADT/StringRef.h" | ||||
39 | #include "llvm/ADT/StringSwitch.h" | ||||
40 | #include "llvm/ADT/Triple.h" | ||||
41 | #include "llvm/ADT/Twine.h" | ||||
42 | #include "llvm/Analysis/VectorUtils.h" | ||||
43 | #include "llvm/CodeGen/CallingConvLower.h" | ||||
44 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||
45 | #include "llvm/CodeGen/IntrinsicLowering.h" | ||||
46 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
47 | #include "llvm/CodeGen/MachineConstantPool.h" | ||||
48 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||||
49 | #include "llvm/CodeGen/MachineFunction.h" | ||||
50 | #include "llvm/CodeGen/MachineInstr.h" | ||||
51 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
52 | #include "llvm/CodeGen/MachineJumpTableInfo.h" | ||||
53 | #include "llvm/CodeGen/MachineMemOperand.h" | ||||
54 | #include "llvm/CodeGen/MachineOperand.h" | ||||
55 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
56 | #include "llvm/CodeGen/RuntimeLibcalls.h" | ||||
57 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
58 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" | ||||
59 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
60 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||
61 | #include "llvm/CodeGen/TargetLowering.h" | ||||
62 | #include "llvm/CodeGen/TargetOpcodes.h" | ||||
63 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||||
64 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||
65 | #include "llvm/CodeGen/ValueTypes.h" | ||||
66 | #include "llvm/IR/Attributes.h" | ||||
67 | #include "llvm/IR/CallingConv.h" | ||||
68 | #include "llvm/IR/Constant.h" | ||||
69 | #include "llvm/IR/Constants.h" | ||||
70 | #include "llvm/IR/DataLayout.h" | ||||
71 | #include "llvm/IR/DebugLoc.h" | ||||
72 | #include "llvm/IR/DerivedTypes.h" | ||||
73 | #include "llvm/IR/Function.h" | ||||
74 | #include "llvm/IR/GlobalAlias.h" | ||||
75 | #include "llvm/IR/GlobalValue.h" | ||||
76 | #include "llvm/IR/GlobalVariable.h" | ||||
77 | #include "llvm/IR/IRBuilder.h" | ||||
78 | #include "llvm/IR/InlineAsm.h" | ||||
79 | #include "llvm/IR/Instruction.h" | ||||
80 | #include "llvm/IR/Instructions.h" | ||||
81 | #include "llvm/IR/IntrinsicInst.h" | ||||
82 | #include "llvm/IR/Intrinsics.h" | ||||
83 | #include "llvm/IR/IntrinsicsARM.h" | ||||
84 | #include "llvm/IR/Module.h" | ||||
85 | #include "llvm/IR/PatternMatch.h" | ||||
86 | #include "llvm/IR/Type.h" | ||||
87 | #include "llvm/IR/User.h" | ||||
88 | #include "llvm/IR/Value.h" | ||||
89 | #include "llvm/MC/MCInstrDesc.h" | ||||
90 | #include "llvm/MC/MCInstrItineraries.h" | ||||
91 | #include "llvm/MC/MCRegisterInfo.h" | ||||
92 | #include "llvm/MC/MCSchedule.h" | ||||
93 | #include "llvm/Support/AtomicOrdering.h" | ||||
94 | #include "llvm/Support/BranchProbability.h" | ||||
95 | #include "llvm/Support/Casting.h" | ||||
96 | #include "llvm/Support/CodeGen.h" | ||||
97 | #include "llvm/Support/CommandLine.h" | ||||
98 | #include "llvm/Support/Compiler.h" | ||||
99 | #include "llvm/Support/Debug.h" | ||||
100 | #include "llvm/Support/ErrorHandling.h" | ||||
101 | #include "llvm/Support/KnownBits.h" | ||||
102 | #include "llvm/Support/MachineValueType.h" | ||||
103 | #include "llvm/Support/MathExtras.h" | ||||
104 | #include "llvm/Support/raw_ostream.h" | ||||
105 | #include "llvm/Target/TargetMachine.h" | ||||
106 | #include "llvm/Target/TargetOptions.h" | ||||
107 | #include <algorithm> | ||||
108 | #include <cassert> | ||||
109 | #include <cstdint> | ||||
110 | #include <cstdlib> | ||||
111 | #include <iterator> | ||||
112 | #include <limits> | ||||
113 | #include <string> | ||||
114 | #include <tuple> | ||||
115 | #include <utility> | ||||
116 | #include <vector> | ||||
117 | |||||
118 | using namespace llvm; | ||||
119 | using namespace llvm::PatternMatch; | ||||
120 | |||||
121 | #define DEBUG_TYPE"arm-isel" "arm-isel" | ||||
122 | |||||
123 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls" , "Number of tail calls"}; | ||||
124 | STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt" , "Number of GAs materialized with movw + movt"}; | ||||
125 | STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals" , "Number of loops generated for byval arguments"}; | ||||
126 | STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" } | ||||
127 | "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" }; | ||||
128 | |||||
129 | static cl::opt<bool> | ||||
130 | ARMInterworking("arm-interworking", cl::Hidden, | ||||
131 | cl::desc("Enable / disable ARM interworking (for debugging only)"), | ||||
132 | cl::init(true)); | ||||
133 | |||||
134 | static cl::opt<bool> EnableConstpoolPromotion( | ||||
135 | "arm-promote-constant", cl::Hidden, | ||||
136 | cl::desc("Enable / disable promotion of unnamed_addr constants into " | ||||
137 | "constant pools"), | ||||
138 | cl::init(false)); // FIXME: set to true by default once PR32780 is fixed | ||||
139 | static cl::opt<unsigned> ConstpoolPromotionMaxSize( | ||||
140 | "arm-promote-constant-max-size", cl::Hidden, | ||||
141 | cl::desc("Maximum size of constant to promote into a constant pool"), | ||||
142 | cl::init(64)); | ||||
143 | static cl::opt<unsigned> ConstpoolPromotionMaxTotal( | ||||
144 | "arm-promote-constant-max-total", cl::Hidden, | ||||
145 | cl::desc("Maximum size of ALL constants to promote into a constant pool"), | ||||
146 | cl::init(128)); | ||||
147 | |||||
148 | cl::opt<unsigned> | ||||
149 | MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, | ||||
150 | cl::desc("Maximum interleave factor for MVE VLDn to generate."), | ||||
151 | cl::init(2)); | ||||
152 | |||||
153 | // The APCS parameter registers. | ||||
154 | static const MCPhysReg GPRArgRegs[] = { | ||||
155 | ARM::R0, ARM::R1, ARM::R2, ARM::R3 | ||||
156 | }; | ||||
157 | |||||
158 | void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { | ||||
159 | if (VT != PromotedLdStVT) { | ||||
160 | setOperationAction(ISD::LOAD, VT, Promote); | ||||
161 | AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); | ||||
162 | |||||
163 | setOperationAction(ISD::STORE, VT, Promote); | ||||
164 | AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); | ||||
165 | } | ||||
166 | |||||
167 | MVT ElemTy = VT.getVectorElementType(); | ||||
168 | if (ElemTy != MVT::f64) | ||||
169 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
170 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
171 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
172 | if (ElemTy == MVT::i32) { | ||||
173 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); | ||||
174 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); | ||||
175 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); | ||||
176 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); | ||||
177 | } else { | ||||
178 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | ||||
179 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | ||||
180 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | ||||
181 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | ||||
182 | } | ||||
183 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
184 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
185 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); | ||||
186 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); | ||||
187 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
188 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
189 | setOperationAction(ISD::VSELECT, VT, Expand); | ||||
190 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); | ||||
191 | if (VT.isInteger()) { | ||||
192 | setOperationAction(ISD::SHL, VT, Custom); | ||||
193 | setOperationAction(ISD::SRA, VT, Custom); | ||||
194 | setOperationAction(ISD::SRL, VT, Custom); | ||||
195 | } | ||||
196 | |||||
197 | // Neon does not support vector divide/remainder operations. | ||||
198 | setOperationAction(ISD::SDIV, VT, Expand); | ||||
199 | setOperationAction(ISD::UDIV, VT, Expand); | ||||
200 | setOperationAction(ISD::FDIV, VT, Expand); | ||||
201 | setOperationAction(ISD::SREM, VT, Expand); | ||||
202 | setOperationAction(ISD::UREM, VT, Expand); | ||||
203 | setOperationAction(ISD::FREM, VT, Expand); | ||||
204 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||||
205 | setOperationAction(ISD::UDIVREM, VT, Expand); | ||||
206 | |||||
207 | if (!VT.isFloatingPoint() && | ||||
208 | VT != MVT::v2i64 && VT != MVT::v1i64) | ||||
209 | for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) | ||||
210 | setOperationAction(Opcode, VT, Legal); | ||||
211 | if (!VT.isFloatingPoint()) | ||||
212 | for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) | ||||
213 | setOperationAction(Opcode, VT, Legal); | ||||
214 | } | ||||
215 | |||||
216 | void ARMTargetLowering::addDRTypeForNEON(MVT VT) { | ||||
217 | addRegisterClass(VT, &ARM::DPRRegClass); | ||||
218 | addTypeForNEON(VT, MVT::f64); | ||||
219 | } | ||||
220 | |||||
221 | void ARMTargetLowering::addQRTypeForNEON(MVT VT) { | ||||
222 | addRegisterClass(VT, &ARM::DPairRegClass); | ||||
223 | addTypeForNEON(VT, MVT::v2f64); | ||||
224 | } | ||||
225 | |||||
226 | void ARMTargetLowering::setAllExpand(MVT VT) { | ||||
227 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) | ||||
228 | setOperationAction(Opc, VT, Expand); | ||||
229 | |||||
230 | // We support these really simple operations even on types where all | ||||
231 | // the actual arithmetic has to be broken down into simpler | ||||
232 | // operations or turned into library calls. | ||||
233 | setOperationAction(ISD::BITCAST, VT, Legal); | ||||
234 | setOperationAction(ISD::LOAD, VT, Legal); | ||||
235 | setOperationAction(ISD::STORE, VT, Legal); | ||||
236 | setOperationAction(ISD::UNDEF, VT, Legal); | ||||
237 | } | ||||
238 | |||||
239 | void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, | ||||
240 | LegalizeAction Action) { | ||||
241 | setLoadExtAction(ISD::EXTLOAD, From, To, Action); | ||||
242 | setLoadExtAction(ISD::ZEXTLOAD, From, To, Action); | ||||
243 | setLoadExtAction(ISD::SEXTLOAD, From, To, Action); | ||||
244 | } | ||||
245 | |||||
246 | void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { | ||||
247 | const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; | ||||
248 | |||||
249 | for (auto VT : IntTypes) { | ||||
250 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
251 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
252 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
253 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
254 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
255 | setOperationAction(ISD::SHL, VT, Custom); | ||||
256 | setOperationAction(ISD::SRA, VT, Custom); | ||||
257 | setOperationAction(ISD::SRL, VT, Custom); | ||||
258 | setOperationAction(ISD::SMIN, VT, Legal); | ||||
259 | setOperationAction(ISD::SMAX, VT, Legal); | ||||
260 | setOperationAction(ISD::UMIN, VT, Legal); | ||||
261 | setOperationAction(ISD::UMAX, VT, Legal); | ||||
262 | setOperationAction(ISD::ABS, VT, Legal); | ||||
263 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
264 | setOperationAction(ISD::MLOAD, VT, Custom); | ||||
265 | setOperationAction(ISD::MSTORE, VT, Legal); | ||||
266 | setOperationAction(ISD::CTLZ, VT, Legal); | ||||
267 | setOperationAction(ISD::CTTZ, VT, Custom); | ||||
268 | setOperationAction(ISD::BITREVERSE, VT, Legal); | ||||
269 | setOperationAction(ISD::BSWAP, VT, Legal); | ||||
270 | setOperationAction(ISD::SADDSAT, VT, Legal); | ||||
271 | setOperationAction(ISD::UADDSAT, VT, Legal); | ||||
272 | setOperationAction(ISD::SSUBSAT, VT, Legal); | ||||
273 | setOperationAction(ISD::USUBSAT, VT, Legal); | ||||
274 | setOperationAction(ISD::ABDS, VT, Legal); | ||||
275 | setOperationAction(ISD::ABDU, VT, Legal); | ||||
276 | setOperationAction(ISD::AVGFLOORS, VT, Legal); | ||||
277 | setOperationAction(ISD::AVGFLOORU, VT, Legal); | ||||
278 | setOperationAction(ISD::AVGCEILS, VT, Legal); | ||||
279 | setOperationAction(ISD::AVGCEILU, VT, Legal); | ||||
280 | |||||
281 | // No native support for these. | ||||
282 | setOperationAction(ISD::UDIV, VT, Expand); | ||||
283 | setOperationAction(ISD::SDIV, VT, Expand); | ||||
284 | setOperationAction(ISD::UREM, VT, Expand); | ||||
285 | setOperationAction(ISD::SREM, VT, Expand); | ||||
286 | setOperationAction(ISD::UDIVREM, VT, Expand); | ||||
287 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||||
288 | setOperationAction(ISD::CTPOP, VT, Expand); | ||||
289 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
290 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
291 | |||||
292 | // Vector reductions | ||||
293 | setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); | ||||
294 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); | ||||
295 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); | ||||
296 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); | ||||
297 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); | ||||
298 | setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); | ||||
299 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); | ||||
300 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); | ||||
301 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); | ||||
302 | |||||
303 | if (!HasMVEFP) { | ||||
304 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | ||||
305 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | ||||
306 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | ||||
307 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | ||||
308 | } else { | ||||
309 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); | ||||
310 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); | ||||
311 | } | ||||
312 | |||||
313 | // Pre and Post inc are supported on loads and stores | ||||
314 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
315 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
316 | setIndexedLoadAction(im, VT, Legal); | ||||
317 | setIndexedStoreAction(im, VT, Legal); | ||||
318 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
319 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
320 | } | ||||
321 | } | ||||
322 | |||||
323 | const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; | ||||
324 | for (auto VT : FloatTypes) { | ||||
325 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
326 | if (!HasMVEFP) | ||||
327 | setAllExpand(VT); | ||||
328 | |||||
329 | // These are legal or custom whether we have MVE.fp or not | ||||
330 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
331 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
332 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom); | ||||
333 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
334 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
335 | setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); | ||||
336 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); | ||||
337 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
338 | setOperationAction(ISD::MLOAD, VT, Custom); | ||||
339 | setOperationAction(ISD::MSTORE, VT, Legal); | ||||
340 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
341 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
342 | |||||
343 | // Pre and Post inc are supported on loads and stores | ||||
344 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
345 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
346 | setIndexedLoadAction(im, VT, Legal); | ||||
347 | setIndexedStoreAction(im, VT, Legal); | ||||
348 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
349 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
350 | } | ||||
351 | |||||
352 | if (HasMVEFP) { | ||||
353 | setOperationAction(ISD::FMINNUM, VT, Legal); | ||||
354 | setOperationAction(ISD::FMAXNUM, VT, Legal); | ||||
355 | setOperationAction(ISD::FROUND, VT, Legal); | ||||
356 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); | ||||
357 | setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); | ||||
358 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); | ||||
359 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); | ||||
360 | |||||
361 | // No native support for these. | ||||
362 | setOperationAction(ISD::FDIV, VT, Expand); | ||||
363 | setOperationAction(ISD::FREM, VT, Expand); | ||||
364 | setOperationAction(ISD::FSQRT, VT, Expand); | ||||
365 | setOperationAction(ISD::FSIN, VT, Expand); | ||||
366 | setOperationAction(ISD::FCOS, VT, Expand); | ||||
367 | setOperationAction(ISD::FPOW, VT, Expand); | ||||
368 | setOperationAction(ISD::FLOG, VT, Expand); | ||||
369 | setOperationAction(ISD::FLOG2, VT, Expand); | ||||
370 | setOperationAction(ISD::FLOG10, VT, Expand); | ||||
371 | setOperationAction(ISD::FEXP, VT, Expand); | ||||
372 | setOperationAction(ISD::FEXP2, VT, Expand); | ||||
373 | setOperationAction(ISD::FNEARBYINT, VT, Expand); | ||||
374 | } | ||||
375 | } | ||||
376 | |||||
377 | // Custom Expand smaller than legal vector reductions to prevent false zero | ||||
378 | // items being added. | ||||
379 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom); | ||||
380 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom); | ||||
381 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom); | ||||
382 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom); | ||||
383 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); | ||||
384 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom); | ||||
385 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); | ||||
386 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); | ||||
387 | |||||
388 | // We 'support' these types up to bitcast/load/store level, regardless of | ||||
389 | // MVE integer-only / float support. Only doing FP data processing on the FP | ||||
390 | // vector types is inhibited at integer-only level. | ||||
391 | const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; | ||||
392 | for (auto VT : LongTypes) { | ||||
393 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
394 | setAllExpand(VT); | ||||
395 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
396 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
397 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
398 | setOperationAction(ISD::VSELECT, VT, Legal); | ||||
399 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
400 | } | ||||
401 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); | ||||
402 | |||||
403 | // We can do bitwise operations on v2i64 vectors | ||||
404 | setOperationAction(ISD::AND, MVT::v2i64, Legal); | ||||
405 | setOperationAction(ISD::OR, MVT::v2i64, Legal); | ||||
406 | setOperationAction(ISD::XOR, MVT::v2i64, Legal); | ||||
407 | |||||
408 | // It is legal to extload from v4i8 to v4i16 or v4i32. | ||||
409 | addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); | ||||
410 | addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); | ||||
411 | addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); | ||||
412 | |||||
413 | // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. | ||||
414 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); | ||||
415 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); | ||||
416 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); | ||||
417 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); | ||||
418 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); | ||||
419 | |||||
420 | // Some truncating stores are legal too. | ||||
421 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); | ||||
422 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); | ||||
423 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); | ||||
424 | |||||
425 | // Pre and Post inc on these are legal, given the correct extends | ||||
426 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
427 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
428 | for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { | ||||
429 | setIndexedLoadAction(im, VT, Legal); | ||||
430 | setIndexedStoreAction(im, VT, Legal); | ||||
431 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
432 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
433 | } | ||||
434 | } | ||||
435 | |||||
436 | // Predicate types | ||||
437 | const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1}; | ||||
438 | for (auto VT : pTypes) { | ||||
439 | addRegisterClass(VT, &ARM::VCCRRegClass); | ||||
440 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
441 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
442 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); | ||||
443 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); | ||||
444 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
445 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
446 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
447 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); | ||||
448 | setOperationAction(ISD::LOAD, VT, Custom); | ||||
449 | setOperationAction(ISD::STORE, VT, Custom); | ||||
450 | setOperationAction(ISD::TRUNCATE, VT, Custom); | ||||
451 | setOperationAction(ISD::VSELECT, VT, Expand); | ||||
452 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
453 | } | ||||
454 | setOperationAction(ISD::SETCC, MVT::v2i1, Expand); | ||||
455 | setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); | ||||
456 | setOperationAction(ISD::AND, MVT::v2i1, Expand); | ||||
457 | setOperationAction(ISD::OR, MVT::v2i1, Expand); | ||||
458 | setOperationAction(ISD::XOR, MVT::v2i1, Expand); | ||||
459 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand); | ||||
460 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand); | ||||
461 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand); | ||||
462 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand); | ||||
463 | |||||
464 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); | ||||
465 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); | ||||
466 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); | ||||
467 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); | ||||
468 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); | ||||
469 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); | ||||
470 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); | ||||
471 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); | ||||
472 | } | ||||
473 | |||||
474 | ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, | ||||
475 | const ARMSubtarget &STI) | ||||
476 | : TargetLowering(TM), Subtarget(&STI) { | ||||
477 | RegInfo = Subtarget->getRegisterInfo(); | ||||
478 | Itins = Subtarget->getInstrItineraryData(); | ||||
479 | |||||
480 | setBooleanContents(ZeroOrOneBooleanContent); | ||||
481 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | ||||
482 | |||||
483 | if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && | ||||
484 | !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { | ||||
485 | bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; | ||||
486 | for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) | ||||
487 | setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), | ||||
488 | IsHFTarget ? CallingConv::ARM_AAPCS_VFP | ||||
489 | : CallingConv::ARM_AAPCS); | ||||
490 | } | ||||
491 | |||||
492 | if (Subtarget->isTargetMachO()) { | ||||
493 | // Uses VFP for Thumb libfuncs if available. | ||||
494 | if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && | ||||
495 | Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { | ||||
496 | static const struct { | ||||
497 | const RTLIB::Libcall Op; | ||||
498 | const char * const Name; | ||||
499 | const ISD::CondCode Cond; | ||||
500 | } LibraryCalls[] = { | ||||
501 | // Single-precision floating-point arithmetic. | ||||
502 | { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, | ||||
503 | { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, | ||||
504 | { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, | ||||
505 | { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, | ||||
506 | |||||
507 | // Double-precision floating-point arithmetic. | ||||
508 | { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, | ||||
509 | { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, | ||||
510 | { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, | ||||
511 | { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, | ||||
512 | |||||
513 | // Single-precision comparisons. | ||||
514 | { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, | ||||
515 | { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, | ||||
516 | { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, | ||||
517 | { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, | ||||
518 | { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, | ||||
519 | { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, | ||||
520 | { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, | ||||
521 | |||||
522 | // Double-precision comparisons. | ||||
523 | { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, | ||||
524 | { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, | ||||
525 | { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, | ||||
526 | { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, | ||||
527 | { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, | ||||
528 | { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, | ||||
529 | { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, | ||||
530 | |||||
531 | // Floating-point to integer conversions. | ||||
532 | // i64 conversions are done via library routines even when generating VFP | ||||
533 | // instructions, so use the same ones. | ||||
534 | { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, | ||||
535 | { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, | ||||
536 | { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, | ||||
537 | { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, | ||||
538 | |||||
539 | // Conversions between floating types. | ||||
540 | { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, | ||||
541 | { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, | ||||
542 | |||||
543 | // Integer to floating-point conversions. | ||||
544 | // i64 conversions are done via library routines even when generating VFP | ||||
545 | // instructions, so use the same ones. | ||||
546 | // FIXME: There appears to be some naming inconsistency in ARM libgcc: | ||||
547 | // e.g., __floatunsidf vs. __floatunssidfvfp. | ||||
548 | { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, | ||||
549 | { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, | ||||
550 | { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, | ||||
551 | { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, | ||||
552 | }; | ||||
553 | |||||
554 | for (const auto &LC : LibraryCalls) { | ||||
555 | setLibcallName(LC.Op, LC.Name); | ||||
556 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
557 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
558 | } | ||||
559 | } | ||||
560 | } | ||||
561 | |||||
562 | // These libcalls are not available in 32-bit. | ||||
563 | setLibcallName(RTLIB::SHL_I128, nullptr); | ||||
564 | setLibcallName(RTLIB::SRL_I128, nullptr); | ||||
565 | setLibcallName(RTLIB::SRA_I128, nullptr); | ||||
566 | setLibcallName(RTLIB::MUL_I128, nullptr); | ||||
567 | setLibcallName(RTLIB::MULO_I64, nullptr); | ||||
568 | setLibcallName(RTLIB::MULO_I128, nullptr); | ||||
569 | |||||
570 | // RTLIB | ||||
571 | if (Subtarget->isAAPCS_ABI() && | ||||
572 | (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || | ||||
573 | Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { | ||||
574 | static const struct { | ||||
575 | const RTLIB::Libcall Op; | ||||
576 | const char * const Name; | ||||
577 | const CallingConv::ID CC; | ||||
578 | const ISD::CondCode Cond; | ||||
579 | } LibraryCalls[] = { | ||||
580 | // Double-precision floating-point arithmetic helper functions | ||||
581 | // RTABI chapter 4.1.2, Table 2 | ||||
582 | { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
583 | { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
584 | { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
585 | { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
586 | |||||
587 | // Double-precision floating-point comparison helper functions | ||||
588 | // RTABI chapter 4.1.2, Table 3 | ||||
589 | { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
590 | { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | ||||
591 | { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
592 | { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
593 | { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
594 | { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
595 | { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
596 | |||||
597 | // Single-precision floating-point arithmetic helper functions | ||||
598 | // RTABI chapter 4.1.2, Table 4 | ||||
599 | { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
600 | { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
601 | { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
602 | { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
603 | |||||
604 | // Single-precision floating-point comparison helper functions | ||||
605 | // RTABI chapter 4.1.2, Table 5 | ||||
606 | { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
607 | { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | ||||
608 | { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
609 | { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
610 | { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
611 | { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
612 | { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
613 | |||||
614 | // Floating-point to integer conversions. | ||||
615 | // RTABI chapter 4.1.2, Table 6 | ||||
616 | { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
617 | { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
618 | { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
619 | { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
620 | { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
621 | { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
622 | { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
623 | { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
624 | |||||
625 | // Conversions between floating types. | ||||
626 | // RTABI chapter 4.1.2, Table 7 | ||||
627 | { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
628 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
629 | { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
630 | |||||
631 | // Integer to floating-point conversions. | ||||
632 | // RTABI chapter 4.1.2, Table 8 | ||||
633 | { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
634 | { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
635 | { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
636 | { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
637 | { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
638 | { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
639 | { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
640 | { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
641 | |||||
642 | // Long long helper functions | ||||
643 | // RTABI chapter 4.2, Table 9 | ||||
644 | { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
645 | { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
646 | { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
647 | { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
648 | |||||
649 | // Integer division functions | ||||
650 | // RTABI chapter 4.3.1 | ||||
651 | { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
652 | { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
653 | { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
654 | { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
655 | { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
656 | { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
657 | { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
658 | { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
659 | }; | ||||
660 | |||||
661 | for (const auto &LC : LibraryCalls) { | ||||
662 | setLibcallName(LC.Op, LC.Name); | ||||
663 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
664 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
665 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
666 | } | ||||
667 | |||||
668 | // EABI dependent RTLIB | ||||
669 | if (TM.Options.EABIVersion == EABI::EABI4 || | ||||
670 | TM.Options.EABIVersion == EABI::EABI5) { | ||||
671 | static const struct { | ||||
672 | const RTLIB::Libcall Op; | ||||
673 | const char *const Name; | ||||
674 | const CallingConv::ID CC; | ||||
675 | const ISD::CondCode Cond; | ||||
676 | } MemOpsLibraryCalls[] = { | ||||
677 | // Memory operations | ||||
678 | // RTABI chapter 4.3.4 | ||||
679 | { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
680 | { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
681 | { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
682 | }; | ||||
683 | |||||
684 | for (const auto &LC : MemOpsLibraryCalls) { | ||||
685 | setLibcallName(LC.Op, LC.Name); | ||||
686 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
687 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
688 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
689 | } | ||||
690 | } | ||||
691 | } | ||||
692 | |||||
693 | if (Subtarget->isTargetWindows()) { | ||||
694 | static const struct { | ||||
695 | const RTLIB::Libcall Op; | ||||
696 | const char * const Name; | ||||
697 | const CallingConv::ID CC; | ||||
698 | } LibraryCalls[] = { | ||||
699 | { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, | ||||
700 | { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, | ||||
701 | { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, | ||||
702 | { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, | ||||
703 | { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, | ||||
704 | { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, | ||||
705 | { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, | ||||
706 | { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, | ||||
707 | }; | ||||
708 | |||||
709 | for (const auto &LC : LibraryCalls) { | ||||
710 | setLibcallName(LC.Op, LC.Name); | ||||
711 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
712 | } | ||||
713 | } | ||||
714 | |||||
715 | // Use divmod compiler-rt calls for iOS 5.0 and later. | ||||
716 | if (Subtarget->isTargetMachO() && | ||||
717 | !(Subtarget->isTargetIOS() && | ||||
718 | Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { | ||||
719 | setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); | ||||
720 | setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); | ||||
721 | } | ||||
722 | |||||
723 | // The half <-> float conversion functions are always soft-float on | ||||
724 | // non-watchos platforms, but are needed for some targets which use a | ||||
725 | // hard-float calling convention by default. | ||||
726 | if (!Subtarget->isTargetWatchABI()) { | ||||
727 | if (Subtarget->isAAPCS_ABI()) { | ||||
728 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); | ||||
729 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); | ||||
730 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); | ||||
731 | } else { | ||||
732 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); | ||||
733 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); | ||||
734 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); | ||||
735 | } | ||||
736 | } | ||||
737 | |||||
738 | // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have | ||||
739 | // a __gnu_ prefix (which is the default). | ||||
740 | if (Subtarget->isTargetAEABI()) { | ||||
741 | static const struct { | ||||
742 | const RTLIB::Libcall Op; | ||||
743 | const char * const Name; | ||||
744 | const CallingConv::ID CC; | ||||
745 | } LibraryCalls[] = { | ||||
746 | { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, | ||||
747 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, | ||||
748 | { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, | ||||
749 | }; | ||||
750 | |||||
751 | for (const auto &LC : LibraryCalls) { | ||||
752 | setLibcallName(LC.Op, LC.Name); | ||||
753 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
754 | } | ||||
755 | } | ||||
756 | |||||
757 | if (Subtarget->isThumb1Only()) | ||||
758 | addRegisterClass(MVT::i32, &ARM::tGPRRegClass); | ||||
759 | else | ||||
760 | addRegisterClass(MVT::i32, &ARM::GPRRegClass); | ||||
761 | |||||
762 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && | ||||
763 | Subtarget->hasFPRegs()) { | ||||
764 | addRegisterClass(MVT::f32, &ARM::SPRRegClass); | ||||
765 | addRegisterClass(MVT::f64, &ARM::DPRRegClass); | ||||
766 | |||||
767 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); | ||||
768 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); | ||||
769 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); | ||||
770 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); | ||||
771 | |||||
772 | if (!Subtarget->hasVFP2Base()) | ||||
773 | setAllExpand(MVT::f32); | ||||
774 | if (!Subtarget->hasFP64()) | ||||
775 | setAllExpand(MVT::f64); | ||||
776 | } | ||||
777 | |||||
778 | if (Subtarget->hasFullFP16()) { | ||||
779 | addRegisterClass(MVT::f16, &ARM::HPRRegClass); | ||||
780 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); | ||||
781 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); | ||||
782 | |||||
783 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); | ||||
784 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); | ||||
785 | } | ||||
786 | |||||
787 | if (Subtarget->hasBF16()) { | ||||
788 | addRegisterClass(MVT::bf16, &ARM::HPRRegClass); | ||||
789 | setAllExpand(MVT::bf16); | ||||
790 | if (!Subtarget->hasFullFP16()) | ||||
791 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); | ||||
792 | } | ||||
793 | |||||
794 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
795 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { | ||||
796 | setTruncStoreAction(VT, InnerVT, Expand); | ||||
797 | addAllExtLoads(VT, InnerVT, Expand); | ||||
798 | } | ||||
799 | |||||
800 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | ||||
801 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); | ||||
802 | |||||
803 | setOperationAction(ISD::BSWAP, VT, Expand); | ||||
804 | } | ||||
805 | |||||
806 | setOperationAction(ISD::ConstantFP, MVT::f32, Custom); | ||||
807 | setOperationAction(ISD::ConstantFP, MVT::f64, Custom); | ||||
808 | |||||
809 | setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); | ||||
810 | setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); | ||||
811 | |||||
812 | if (Subtarget->hasMVEIntegerOps()) | ||||
813 | addMVEVectorTypes(Subtarget->hasMVEFloatOps()); | ||||
814 | |||||
815 | // Combine low-overhead loop intrinsics so that we can lower i1 types. | ||||
816 | if (Subtarget->hasLOB()) { | ||||
817 | setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); | ||||
818 | } | ||||
819 | |||||
820 | if (Subtarget->hasNEON()) { | ||||
821 | addDRTypeForNEON(MVT::v2f32); | ||||
822 | addDRTypeForNEON(MVT::v8i8); | ||||
823 | addDRTypeForNEON(MVT::v4i16); | ||||
824 | addDRTypeForNEON(MVT::v2i32); | ||||
825 | addDRTypeForNEON(MVT::v1i64); | ||||
826 | |||||
827 | addQRTypeForNEON(MVT::v4f32); | ||||
828 | addQRTypeForNEON(MVT::v2f64); | ||||
829 | addQRTypeForNEON(MVT::v16i8); | ||||
830 | addQRTypeForNEON(MVT::v8i16); | ||||
831 | addQRTypeForNEON(MVT::v4i32); | ||||
832 | addQRTypeForNEON(MVT::v2i64); | ||||
833 | |||||
834 | if (Subtarget->hasFullFP16()) { | ||||
835 | addQRTypeForNEON(MVT::v8f16); | ||||
836 | addDRTypeForNEON(MVT::v4f16); | ||||
837 | } | ||||
838 | |||||
839 | if (Subtarget->hasBF16()) { | ||||
840 | addQRTypeForNEON(MVT::v8bf16); | ||||
841 | addDRTypeForNEON(MVT::v4bf16); | ||||
842 | } | ||||
843 | } | ||||
844 | |||||
845 | if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { | ||||
846 | // v2f64 is legal so that QR subregs can be extracted as f64 elements, but | ||||
847 | // none of Neon, MVE or VFP supports any arithmetic operations on it. | ||||
848 | setOperationAction(ISD::FADD, MVT::v2f64, Expand); | ||||
849 | setOperationAction(ISD::FSUB, MVT::v2f64, Expand); | ||||
850 | setOperationAction(ISD::FMUL, MVT::v2f64, Expand); | ||||
851 | // FIXME: Code duplication: FDIV and FREM are expanded always, see | ||||
852 | // ARMTargetLowering::addTypeForNEON method for details. | ||||
853 | setOperationAction(ISD::FDIV, MVT::v2f64, Expand); | ||||
854 | setOperationAction(ISD::FREM, MVT::v2f64, Expand); | ||||
855 | // FIXME: Create unittest. | ||||
856 | // In another words, find a way when "copysign" appears in DAG with vector | ||||
857 | // operands. | ||||
858 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); | ||||
859 | // FIXME: Code duplication: SETCC has custom operation action, see | ||||
860 | // ARMTargetLowering::addTypeForNEON method for details. | ||||
861 | setOperationAction(ISD::SETCC, MVT::v2f64, Expand); | ||||
862 | // FIXME: Create unittest for FNEG and for FABS. | ||||
863 | setOperationAction(ISD::FNEG, MVT::v2f64, Expand); | ||||
864 | setOperationAction(ISD::FABS, MVT::v2f64, Expand); | ||||
865 | setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); | ||||
866 | setOperationAction(ISD::FSIN, MVT::v2f64, Expand); | ||||
867 | setOperationAction(ISD::FCOS, MVT::v2f64, Expand); | ||||
868 | setOperationAction(ISD::FPOW, MVT::v2f64, Expand); | ||||
869 | setOperationAction(ISD::FLOG, MVT::v2f64, Expand); | ||||
870 | setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); | ||||
871 | setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); | ||||
872 | setOperationAction(ISD::FEXP, MVT::v2f64, Expand); | ||||
873 | setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); | ||||
874 | // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. | ||||
875 | setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); | ||||
876 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); | ||||
877 | setOperationAction(ISD::FRINT, MVT::v2f64, Expand); | ||||
878 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); | ||||
879 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); | ||||
880 | setOperationAction(ISD::FMA, MVT::v2f64, Expand); | ||||
881 | } | ||||
882 | |||||
883 | if (Subtarget->hasNEON()) { | ||||
884 | // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively | ||||
885 | // supported for v4f32. | ||||
886 | setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); | ||||
887 | setOperationAction(ISD::FSIN, MVT::v4f32, Expand); | ||||
888 | setOperationAction(ISD::FCOS, MVT::v4f32, Expand); | ||||
889 | setOperationAction(ISD::FPOW, MVT::v4f32, Expand); | ||||
890 | setOperationAction(ISD::FLOG, MVT::v4f32, Expand); | ||||
891 | setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); | ||||
892 | setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); | ||||
893 | setOperationAction(ISD::FEXP, MVT::v4f32, Expand); | ||||
894 | setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); | ||||
895 | setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); | ||||
896 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); | ||||
897 | setOperationAction(ISD::FRINT, MVT::v4f32, Expand); | ||||
898 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); | ||||
899 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); | ||||
900 | |||||
901 | // Mark v2f32 intrinsics. | ||||
902 | setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); | ||||
903 | setOperationAction(ISD::FSIN, MVT::v2f32, Expand); | ||||
904 | setOperationAction(ISD::FCOS, MVT::v2f32, Expand); | ||||
905 | setOperationAction(ISD::FPOW, MVT::v2f32, Expand); | ||||
906 | setOperationAction(ISD::FLOG, MVT::v2f32, Expand); | ||||
907 | setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); | ||||
908 | setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); | ||||
909 | setOperationAction(ISD::FEXP, MVT::v2f32, Expand); | ||||
910 | setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); | ||||
911 | setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); | ||||
912 | setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); | ||||
913 | setOperationAction(ISD::FRINT, MVT::v2f32, Expand); | ||||
914 | setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); | ||||
915 | setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); | ||||
916 | |||||
917 | // Neon does not support some operations on v1i64 and v2i64 types. | ||||
918 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); | ||||
919 | // Custom handling for some quad-vector types to detect VMULL. | ||||
920 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); | ||||
921 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); | ||||
922 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); | ||||
923 | // Custom handling for some vector types to avoid expensive expansions | ||||
924 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); | ||||
925 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); | ||||
926 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); | ||||
927 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); | ||||
928 | // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with | ||||
929 | // a destination type that is wider than the source, and nor does | ||||
930 | // it have a FP_TO_[SU]INT instruction with a narrower destination than | ||||
931 | // source. | ||||
932 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); | ||||
933 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); | ||||
934 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); | ||||
935 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); | ||||
936 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); | ||||
937 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); | ||||
938 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); | ||||
939 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); | ||||
940 | |||||
941 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); | ||||
942 | setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); | ||||
943 | |||||
944 | // NEON does not have single instruction CTPOP for vectors with element | ||||
945 | // types wider than 8-bits. However, custom lowering can leverage the | ||||
946 | // v8i8/v16i8 vcnt instruction. | ||||
947 | setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); | ||||
948 | setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); | ||||
949 | setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); | ||||
950 | setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); | ||||
951 | setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); | ||||
952 | setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); | ||||
953 | |||||
954 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); | ||||
955 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); | ||||
956 | |||||
957 | // NEON does not have single instruction CTTZ for vectors. | ||||
958 | setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); | ||||
959 | setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); | ||||
960 | setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); | ||||
961 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); | ||||
962 | |||||
963 | setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); | ||||
964 | setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); | ||||
965 | setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); | ||||
966 | setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); | ||||
967 | |||||
968 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); | ||||
969 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); | ||||
970 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); | ||||
971 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); | ||||
972 | |||||
973 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); | ||||
974 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); | ||||
975 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); | ||||
976 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); | ||||
977 | |||||
978 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
979 | setOperationAction(ISD::MULHS, VT, Expand); | ||||
980 | setOperationAction(ISD::MULHU, VT, Expand); | ||||
981 | } | ||||
982 | |||||
983 | // NEON only has FMA instructions as of VFP4. | ||||
984 | if (!Subtarget->hasVFP4Base()) { | ||||
985 | setOperationAction(ISD::FMA, MVT::v2f32, Expand); | ||||
986 | setOperationAction(ISD::FMA, MVT::v4f32, Expand); | ||||
987 | } | ||||
988 | |||||
989 | setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, | ||||
990 | ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); | ||||
991 | |||||
992 | // It is legal to extload from v4i8 to v4i16 or v4i32. | ||||
993 | for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, | ||||
994 | MVT::v2i32}) { | ||||
995 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { | ||||
996 | setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); | ||||
997 | setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); | ||||
998 | setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); | ||||
999 | } | ||||
1000 | } | ||||
1001 | } | ||||
1002 | |||||
1003 | if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { | ||||
1004 | setTargetDAGCombine( | ||||
1005 | {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, | ||||
1006 | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | ||||
1007 | ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, | ||||
1008 | ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, | ||||
1009 | ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); | ||||
1010 | } | ||||
1011 | if (Subtarget->hasMVEIntegerOps()) { | ||||
1012 | setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, | ||||
1013 | ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, | ||||
1014 | ISD::SETCC}); | ||||
1015 | } | ||||
1016 | if (Subtarget->hasMVEFloatOps()) { | ||||
1017 | setTargetDAGCombine(ISD::FADD); | ||||
1018 | } | ||||
1019 | |||||
1020 | if (!Subtarget->hasFP64()) { | ||||
1021 | // When targeting a floating-point unit with only single-precision | ||||
1022 | // operations, f64 is legal for the few double-precision instructions which | ||||
1023 | // are present However, no double-precision operations other than moves, | ||||
1024 | // loads and stores are provided by the hardware. | ||||
1025 | setOperationAction(ISD::FADD, MVT::f64, Expand); | ||||
1026 | setOperationAction(ISD::FSUB, MVT::f64, Expand); | ||||
1027 | setOperationAction(ISD::FMUL, MVT::f64, Expand); | ||||
1028 | setOperationAction(ISD::FMA, MVT::f64, Expand); | ||||
1029 | setOperationAction(ISD::FDIV, MVT::f64, Expand); | ||||
1030 | setOperationAction(ISD::FREM, MVT::f64, Expand); | ||||
1031 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); | ||||
1032 | setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); | ||||
1033 | setOperationAction(ISD::FNEG, MVT::f64, Expand); | ||||
1034 | setOperationAction(ISD::FABS, MVT::f64, Expand); | ||||
1035 | setOperationAction(ISD::FSQRT, MVT::f64, Expand); | ||||
1036 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | ||||
1037 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | ||||
1038 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | ||||
1039 | setOperationAction(ISD::FLOG, MVT::f64, Expand); | ||||
1040 | setOperationAction(ISD::FLOG2, MVT::f64, Expand); | ||||
1041 | setOperationAction(ISD::FLOG10, MVT::f64, Expand); | ||||
1042 | setOperationAction(ISD::FEXP, MVT::f64, Expand); | ||||
1043 | setOperationAction(ISD::FEXP2, MVT::f64, Expand); | ||||
1044 | setOperationAction(ISD::FCEIL, MVT::f64, Expand); | ||||
1045 | setOperationAction(ISD::FTRUNC, MVT::f64, Expand); | ||||
1046 | setOperationAction(ISD::FRINT, MVT::f64, Expand); | ||||
1047 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); | ||||
1048 | setOperationAction(ISD::FFLOOR, MVT::f64, Expand); | ||||
1049 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); | ||||
1050 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); | ||||
1051 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); | ||||
1052 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); | ||||
1053 | setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); | ||||
1054 | setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); | ||||
1055 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); | ||||
1056 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); | ||||
1057 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); | ||||
1058 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); | ||||
1059 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); | ||||
1060 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); | ||||
1061 | } | ||||
1062 | |||||
1063 | if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { | ||||
1064 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); | ||||
1065 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); | ||||
1066 | if (Subtarget->hasFullFP16()) { | ||||
1067 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); | ||||
1068 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); | ||||
1069 | } | ||||
1070 | } | ||||
1071 | |||||
1072 | if (!Subtarget->hasFP16()) { | ||||
1073 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); | ||||
1074 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); | ||||
1075 | } | ||||
1076 | |||||
1077 | computeRegisterProperties(Subtarget->getRegisterInfo()); | ||||
1078 | |||||
1079 | // ARM does not have floating-point extending loads. | ||||
1080 | for (MVT VT : MVT::fp_valuetypes()) { | ||||
1081 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); | ||||
1082 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); | ||||
1083 | } | ||||
1084 | |||||
1085 | // ... or truncating stores | ||||
1086 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | ||||
1087 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | ||||
1088 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | ||||
1089 | |||||
1090 | // ARM does not have i1 sign extending load. | ||||
1091 | for (MVT VT : MVT::integer_valuetypes()) | ||||
1092 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | ||||
1093 | |||||
1094 | // ARM supports all 4 flavors of integer indexed load / store. | ||||
1095 | if (!Subtarget->isThumb1Only()) { | ||||
1096 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
1097 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
1098 | setIndexedLoadAction(im, MVT::i1, Legal); | ||||
1099 | setIndexedLoadAction(im, MVT::i8, Legal); | ||||
1100 | setIndexedLoadAction(im, MVT::i16, Legal); | ||||
1101 | setIndexedLoadAction(im, MVT::i32, Legal); | ||||
1102 | setIndexedStoreAction(im, MVT::i1, Legal); | ||||
1103 | setIndexedStoreAction(im, MVT::i8, Legal); | ||||
1104 | setIndexedStoreAction(im, MVT::i16, Legal); | ||||
1105 | setIndexedStoreAction(im, MVT::i32, Legal); | ||||
1106 | } | ||||
1107 | } else { | ||||
1108 | // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. | ||||
1109 | setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); | ||||
1110 | setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); | ||||
1111 | } | ||||
1112 | |||||
1113 | setOperationAction(ISD::SADDO, MVT::i32, Custom); | ||||
1114 | setOperationAction(ISD::UADDO, MVT::i32, Custom); | ||||
1115 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); | ||||
1116 | setOperationAction(ISD::USUBO, MVT::i32, Custom); | ||||
1117 | |||||
1118 | setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); | ||||
1119 | setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); | ||||
1120 | if (Subtarget->hasDSP()) { | ||||
1121 | setOperationAction(ISD::SADDSAT, MVT::i8, Custom); | ||||
1122 | setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); | ||||
1123 | setOperationAction(ISD::SADDSAT, MVT::i16, Custom); | ||||
1124 | setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); | ||||
1125 | setOperationAction(ISD::UADDSAT, MVT::i8, Custom); | ||||
1126 | setOperationAction(ISD::USUBSAT, MVT::i8, Custom); | ||||
1127 | setOperationAction(ISD::UADDSAT, MVT::i16, Custom); | ||||
1128 | setOperationAction(ISD::USUBSAT, MVT::i16, Custom); | ||||
1129 | } | ||||
1130 | if (Subtarget->hasBaseDSP()) { | ||||
1131 | setOperationAction(ISD::SADDSAT, MVT::i32, Legal); | ||||
1132 | setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); | ||||
1133 | } | ||||
1134 | |||||
1135 | // i64 operation support. | ||||
1136 | setOperationAction(ISD::MUL, MVT::i64, Expand); | ||||
1137 | setOperationAction(ISD::MULHU, MVT::i32, Expand); | ||||
1138 | if (Subtarget->isThumb1Only()) { | ||||
1139 | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); | ||||
1140 | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); | ||||
1141 | } | ||||
1142 | if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() | ||||
1143 | || (Subtarget->isThumb2() && !Subtarget->hasDSP())) | ||||
1144 | setOperationAction(ISD::MULHS, MVT::i32, Expand); | ||||
1145 | |||||
1146 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); | ||||
1147 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); | ||||
1148 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); | ||||
1149 | setOperationAction(ISD::SRL, MVT::i64, Custom); | ||||
1150 | setOperationAction(ISD::SRA, MVT::i64, Custom); | ||||
1151 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | ||||
1152 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); | ||||
1153 | setOperationAction(ISD::LOAD, MVT::i64, Custom); | ||||
1154 | setOperationAction(ISD::STORE, MVT::i64, Custom); | ||||
1155 | |||||
1156 | // MVE lowers 64 bit shifts to lsll and lsrl | ||||
1157 | // assuming that ISD::SRL and SRA of i64 are already marked custom | ||||
1158 | if (Subtarget->hasMVEIntegerOps()) | ||||
1159 | setOperationAction(ISD::SHL, MVT::i64, Custom); | ||||
1160 | |||||
1161 | // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. | ||||
1162 | if (Subtarget->isThumb1Only()) { | ||||
1163 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); | ||||
1164 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); | ||||
1165 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); | ||||
1166 | } | ||||
1167 | |||||
1168 | if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) | ||||
1169 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); | ||||
1170 | |||||
1171 | // ARM does not have ROTL. | ||||
1172 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | ||||
1173 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
1174 | setOperationAction(ISD::ROTL, VT, Expand); | ||||
1175 | setOperationAction(ISD::ROTR, VT, Expand); | ||||
1176 | } | ||||
1177 | setOperationAction(ISD::CTTZ, MVT::i32, Custom); | ||||
1178 | setOperationAction(ISD::CTPOP, MVT::i32, Expand); | ||||
1179 | if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { | ||||
1180 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); | ||||
1181 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); | ||||
1182 | } | ||||
1183 | |||||
1184 | // @llvm.readcyclecounter requires the Performance Monitors extension. | ||||
1185 | // Default to the 0 expansion on unsupported platforms. | ||||
1186 | // FIXME: Technically there are older ARM CPUs that have | ||||
1187 | // implementation-specific ways of obtaining this information. | ||||
1188 | if (Subtarget->hasPerfMon()) | ||||
1189 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); | ||||
1190 | |||||
1191 | // Only ARMv6 has BSWAP. | ||||
1192 | if (!Subtarget->hasV6Ops()) | ||||
1193 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | ||||
1194 | |||||
1195 | bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() | ||||
1196 | : Subtarget->hasDivideInARMMode(); | ||||
1197 | if (!hasDivide) { | ||||
1198 | // These are expanded into libcalls if the cpu doesn't have HW divider. | ||||
1199 | setOperationAction(ISD::SDIV, MVT::i32, LibCall); | ||||
1200 | setOperationAction(ISD::UDIV, MVT::i32, LibCall); | ||||
1201 | } | ||||
1202 | |||||
1203 | if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { | ||||
1204 | setOperationAction(ISD::SDIV, MVT::i32, Custom); | ||||
1205 | setOperationAction(ISD::UDIV, MVT::i32, Custom); | ||||
1206 | |||||
1207 | setOperationAction(ISD::SDIV, MVT::i64, Custom); | ||||
1208 | setOperationAction(ISD::UDIV, MVT::i64, Custom); | ||||
1209 | } | ||||
1210 | |||||
1211 | setOperationAction(ISD::SREM, MVT::i32, Expand); | ||||
1212 | setOperationAction(ISD::UREM, MVT::i32, Expand); | ||||
1213 | |||||
1214 | // Register based DivRem for AEABI (RTABI 4.2) | ||||
1215 | if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || | ||||
1216 | Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || | ||||
1217 | Subtarget->isTargetWindows()) { | ||||
1218 | setOperationAction(ISD::SREM, MVT::i64, Custom); | ||||
1219 | setOperationAction(ISD::UREM, MVT::i64, Custom); | ||||
1220 | HasStandaloneRem = false; | ||||
1221 | |||||
1222 | if (Subtarget->isTargetWindows()) { | ||||
1223 | const struct { | ||||
1224 | const RTLIB::Libcall Op; | ||||
1225 | const char * const Name; | ||||
1226 | const CallingConv::ID CC; | ||||
1227 | } LibraryCalls[] = { | ||||
1228 | { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1229 | { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1230 | { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1231 | { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, | ||||
1232 | |||||
1233 | { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1234 | { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1235 | { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1236 | { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, | ||||
1237 | }; | ||||
1238 | |||||
1239 | for (const auto &LC : LibraryCalls) { | ||||
1240 | setLibcallName(LC.Op, LC.Name); | ||||
1241 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
1242 | } | ||||
1243 | } else { | ||||
1244 | const struct { | ||||
1245 | const RTLIB::Libcall Op; | ||||
1246 | const char * const Name; | ||||
1247 | const CallingConv::ID CC; | ||||
1248 | } LibraryCalls[] = { | ||||
1249 | { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1250 | { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1251 | { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1252 | { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, | ||||
1253 | |||||
1254 | { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1255 | { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1256 | { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1257 | { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, | ||||
1258 | }; | ||||
1259 | |||||
1260 | for (const auto &LC : LibraryCalls) { | ||||
1261 | setLibcallName(LC.Op, LC.Name); | ||||
1262 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
1263 | } | ||||
1264 | } | ||||
1265 | |||||
1266 | setOperationAction(ISD::SDIVREM, MVT::i32, Custom); | ||||
1267 | setOperationAction(ISD::UDIVREM, MVT::i32, Custom); | ||||
1268 | setOperationAction(ISD::SDIVREM, MVT::i64, Custom); | ||||
1269 | setOperationAction(ISD::UDIVREM, MVT::i64, Custom); | ||||
1270 | } else { | ||||
1271 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); | ||||
1272 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); | ||||
1273 | } | ||||
1274 | |||||
1275 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { | ||||
1276 | // MSVCRT doesn't have powi; fall back to pow | ||||
1277 | setLibcallName(RTLIB::POWI_F32, nullptr); | ||||
1278 | setLibcallName(RTLIB::POWI_F64, nullptr); | ||||
1279 | } | ||||
1280 | |||||
1281 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | ||||
1282 | setOperationAction(ISD::ConstantPool, MVT::i32, Custom); | ||||
1283 | setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); | ||||
1284 | setOperationAction(ISD::BlockAddress, MVT::i32, Custom); | ||||
1285 | |||||
1286 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | ||||
1287 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); | ||||
1288 | |||||
1289 | // Use the default implementation. | ||||
1290 | setOperationAction(ISD::VASTART, MVT::Other, Custom); | ||||
1291 | setOperationAction(ISD::VAARG, MVT::Other, Expand); | ||||
1292 | setOperationAction(ISD::VACOPY, MVT::Other, Expand); | ||||
1293 | setOperationAction(ISD::VAEND, MVT::Other, Expand); | ||||
1294 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); | ||||
1295 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); | ||||
1296 | |||||
1297 | if (Subtarget->isTargetWindows()) | ||||
1298 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); | ||||
1299 | else | ||||
1300 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); | ||||
1301 | |||||
1302 | // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use | ||||
1303 | // the default expansion. | ||||
1304 | InsertFencesForAtomic = false; | ||||
1305 | if (Subtarget->hasAnyDataBarrier() && | ||||
1306 | (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { | ||||
1307 | // ATOMIC_FENCE needs custom lowering; the others should have been expanded | ||||
1308 | // to ldrex/strex loops already. | ||||
1309 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); | ||||
1310 | if (!Subtarget->isThumb() || !Subtarget->isMClass()) | ||||
1311 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); | ||||
1312 | |||||
1313 | // On v8, we have particularly efficient implementations of atomic fences | ||||
1314 | // if they can be combined with nearby atomic loads and stores. | ||||
1315 | if (!Subtarget->hasAcquireRelease() || | ||||
1316 | getTargetMachine().getOptLevel() == 0) { | ||||
1317 | // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. | ||||
1318 | InsertFencesForAtomic = true; | ||||
1319 | } | ||||
1320 | } else { | ||||
1321 | // If there's anything we can use as a barrier, go through custom lowering | ||||
1322 | // for ATOMIC_FENCE. | ||||
1323 | // If target has DMB in thumb, Fences can be inserted. | ||||
1324 | if (Subtarget->hasDataBarrier()) | ||||
1325 | InsertFencesForAtomic = true; | ||||
1326 | |||||
1327 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, | ||||
1328 | Subtarget->hasAnyDataBarrier() ? Custom : Expand); | ||||
1329 | |||||
1330 | // Set them all for expansion, which will force libcalls. | ||||
1331 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); | ||||
1332 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); | ||||
1333 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); | ||||
1334 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); | ||||
1335 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); | ||||
1336 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); | ||||
1337 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); | ||||
1338 | setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); | ||||
1339 | setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); | ||||
1340 | setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); | ||||
1341 | setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); | ||||
1342 | setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); | ||||
1343 | // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the | ||||
1344 | // Unordered/Monotonic case. | ||||
1345 | if (!InsertFencesForAtomic) { | ||||
1346 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); | ||||
1347 | setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); | ||||
1348 | } | ||||
1349 | } | ||||
1350 | |||||
1351 | // Compute supported atomic widths. | ||||
1352 | if (Subtarget->isTargetLinux() || | ||||
1353 | (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { | ||||
1354 | // For targets where __sync_* routines are reliably available, we use them | ||||
1355 | // if necessary. | ||||
1356 | // | ||||
1357 | // ARM Linux always supports 64-bit atomics through kernel-assisted atomic | ||||
1358 | // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? | ||||
1359 | // | ||||
1360 | // ARMv6 targets have native instructions in ARM mode. For Thumb mode, | ||||
1361 | // such targets should provide __sync_* routines, which use the ARM mode | ||||
1362 | // instructions. (ARMv6 doesn't have dmb, but it has an equivalent | ||||
1363 | // encoding; see ARMISD::MEMBARRIER_MCR.) | ||||
1364 | setMaxAtomicSizeInBitsSupported(64); | ||||
1365 | } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { | ||||
1366 | // Cortex-M (besides Cortex-M0) have 32-bit atomics. | ||||
1367 | setMaxAtomicSizeInBitsSupported(32); | ||||
1368 | } else { | ||||
1369 | // We can't assume anything about other targets; just use libatomic | ||||
1370 | // routines. | ||||
1371 | setMaxAtomicSizeInBitsSupported(0); | ||||
1372 | } | ||||
1373 | |||||
1374 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); | ||||
1375 | |||||
1376 | // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. | ||||
1377 | if (!Subtarget->hasV6Ops()) { | ||||
1378 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); | ||||
1379 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); | ||||
1380 | } | ||||
1381 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | ||||
1382 | |||||
1383 | if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && | ||||
1384 | !Subtarget->isThumb1Only()) { | ||||
1385 | // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR | ||||
1386 | // iff target supports vfp2. | ||||
1387 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); | ||||
1388 | setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); | ||||
1389 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); | ||||
1390 | } | ||||
1391 | |||||
1392 | // We want to custom lower some of our intrinsics. | ||||
1393 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||||
1394 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); | ||||
1395 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); | ||||
1396 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); | ||||
1397 | if (Subtarget->useSjLjEH()) | ||||
1398 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); | ||||
1399 | |||||
1400 | setOperationAction(ISD::SETCC, MVT::i32, Expand); | ||||
1401 | setOperationAction(ISD::SETCC, MVT::f32, Expand); | ||||
1402 | setOperationAction(ISD::SETCC, MVT::f64, Expand); | ||||
1403 | setOperationAction(ISD::SELECT, MVT::i32, Custom); | ||||
1404 | setOperationAction(ISD::SELECT, MVT::f32, Custom); | ||||
1405 | setOperationAction(ISD::SELECT, MVT::f64, Custom); | ||||
1406 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | ||||
1407 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | ||||
1408 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); | ||||
1409 | if (Subtarget->hasFullFP16()) { | ||||
1410 | setOperationAction(ISD::SETCC, MVT::f16, Expand); | ||||
1411 | setOperationAction(ISD::SELECT, MVT::f16, Custom); | ||||
1412 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); | ||||
1413 | } | ||||
1414 | |||||
1415 | setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); | ||||
1416 | |||||
1417 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); | ||||
1418 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); | ||||
1419 | if (Subtarget->hasFullFP16()) | ||||
1420 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); | ||||
1421 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); | ||||
1422 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); | ||||
1423 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); | ||||
1424 | |||||
1425 | // We don't support sin/cos/fmod/copysign/pow | ||||
1426 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | ||||
1427 | setOperationAction(ISD::FSIN, MVT::f32, Expand); | ||||
1428 | setOperationAction(ISD::FCOS, MVT::f32, Expand); | ||||
1429 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | ||||
1430 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); | ||||
1431 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); | ||||
1432 | setOperationAction(ISD::FREM, MVT::f64, Expand); | ||||
1433 | setOperationAction(ISD::FREM, MVT::f32, Expand); | ||||
1434 | if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && | ||||
1435 | !Subtarget->isThumb1Only()) { | ||||
1436 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); | ||||
1437 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); | ||||
1438 | } | ||||
1439 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | ||||
1440 | setOperationAction(ISD::FPOW, MVT::f32, Expand); | ||||
1441 | |||||
1442 | if (!Subtarget->hasVFP4Base()) { | ||||
1443 | setOperationAction(ISD::FMA, MVT::f64, Expand); | ||||
1444 | setOperationAction(ISD::FMA, MVT::f32, Expand); | ||||
1445 | } | ||||
1446 | |||||
1447 | // Various VFP goodness | ||||
1448 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { | ||||
1449 | // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. | ||||
1450 | if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { | ||||
1451 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); | ||||
1452 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); | ||||
1453 | } | ||||
1454 | |||||
1455 | // fp16 is a special v7 extension that adds f16 <-> f32 conversions. | ||||
1456 | if (!Subtarget->hasFP16()) { | ||||
1457 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); | ||||
1458 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); | ||||
1459 | } | ||||
1460 | |||||
1461 | // Strict floating-point comparisons need custom lowering. | ||||
1462 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); | ||||
1463 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); | ||||
1464 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); | ||||
1465 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); | ||||
1466 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); | ||||
1467 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); | ||||
1468 | } | ||||
1469 | |||||
1470 | // Use __sincos_stret if available. | ||||
1471 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && | ||||
1472 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { | ||||
1473 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); | ||||
1474 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); | ||||
1475 | } | ||||
1476 | |||||
1477 | // FP-ARMv8 implements a lot of rounding-like FP operations. | ||||
1478 | if (Subtarget->hasFPARMv8Base()) { | ||||
1479 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); | ||||
1480 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); | ||||
1481 | setOperationAction(ISD::FROUND, MVT::f32, Legal); | ||||
1482 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); | ||||
1483 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); | ||||
1484 | setOperationAction(ISD::FRINT, MVT::f32, Legal); | ||||
1485 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); | ||||
1486 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); | ||||
1487 | if (Subtarget->hasNEON()) { | ||||
1488 | setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); | ||||
1489 | setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); | ||||
1490 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); | ||||
1491 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); | ||||
1492 | } | ||||
1493 | |||||
1494 | if (Subtarget->hasFP64()) { | ||||
1495 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); | ||||
1496 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); | ||||
1497 | setOperationAction(ISD::FROUND, MVT::f64, Legal); | ||||
1498 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); | ||||
1499 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); | ||||
1500 | setOperationAction(ISD::FRINT, MVT::f64, Legal); | ||||
1501 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); | ||||
1502 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); | ||||
1503 | } | ||||
1504 | } | ||||
1505 | |||||
1506 | // FP16 often need to be promoted to call lib functions | ||||
1507 | if (Subtarget->hasFullFP16()) { | ||||
1508 | setOperationAction(ISD::FREM, MVT::f16, Promote); | ||||
1509 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); | ||||
1510 | setOperationAction(ISD::FSIN, MVT::f16, Promote); | ||||
1511 | setOperationAction(ISD::FCOS, MVT::f16, Promote); | ||||
1512 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); | ||||
1513 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); | ||||
1514 | setOperationAction(ISD::FPOW, MVT::f16, Promote); | ||||
1515 | setOperationAction(ISD::FEXP, MVT::f16, Promote); | ||||
1516 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); | ||||
1517 | setOperationAction(ISD::FLOG, MVT::f16, Promote); | ||||
1518 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); | ||||
1519 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); | ||||
1520 | |||||
1521 | setOperationAction(ISD::FROUND, MVT::f16, Legal); | ||||
1522 | } | ||||
1523 | |||||
1524 | if (Subtarget->hasNEON()) { | ||||
1525 | // vmin and vmax aren't available in a scalar form, so we can use | ||||
1526 | // a NEON instruction with an undef lane instead. This has a performance | ||||
1527 | // penalty on some cores, so we don't do this unless we have been | ||||
1528 | // asked to by the core tuning model. | ||||
1529 | if (Subtarget->useNEONForSinglePrecisionFP()) { | ||||
1530 | setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); | ||||
1531 | setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); | ||||
1532 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); | ||||
1533 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); | ||||
1534 | } | ||||
1535 | setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); | ||||
1536 | setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); | ||||
1537 | setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); | ||||
1538 | setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); | ||||
1539 | |||||
1540 | if (Subtarget->hasFullFP16()) { | ||||
1541 | setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); | ||||
1542 | setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); | ||||
1543 | setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); | ||||
1544 | setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); | ||||
1545 | |||||
1546 | setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); | ||||
1547 | setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); | ||||
1548 | setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); | ||||
1549 | setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); | ||||
1550 | } | ||||
1551 | } | ||||
1552 | |||||
1553 | // We have target-specific dag combine patterns for the following nodes: | ||||
1554 | // ARMISD::VMOVRRD - No need to call setTargetDAGCombine | ||||
1555 | setTargetDAGCombine( | ||||
1556 | {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); | ||||
1557 | |||||
1558 | if (Subtarget->hasMVEIntegerOps()) | ||||
1559 | setTargetDAGCombine(ISD::VSELECT); | ||||
1560 | |||||
1561 | if (Subtarget->hasV6Ops()) | ||||
1562 | setTargetDAGCombine(ISD::SRL); | ||||
1563 | if (Subtarget->isThumb1Only()) | ||||
1564 | setTargetDAGCombine(ISD::SHL); | ||||
1565 | // Attempt to lower smin/smax to ssat/usat | ||||
1566 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || | ||||
1567 | Subtarget->isThumb2()) { | ||||
1568 | setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); | ||||
1569 | } | ||||
1570 | |||||
1571 | setStackPointerRegisterToSaveRestore(ARM::SP); | ||||
1572 | |||||
1573 | if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || | ||||
1574 | !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) | ||||
1575 | setSchedulingPreference(Sched::RegPressure); | ||||
1576 | else | ||||
1577 | setSchedulingPreference(Sched::Hybrid); | ||||
1578 | |||||
1579 | //// temporary - rewrite interface to use type | ||||
1580 | MaxStoresPerMemset = 8; | ||||
1581 | MaxStoresPerMemsetOptSize = 4; | ||||
1582 | MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores | ||||
1583 | MaxStoresPerMemcpyOptSize = 2; | ||||
1584 | MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores | ||||
1585 | MaxStoresPerMemmoveOptSize = 2; | ||||
1586 | |||||
1587 | // On ARM arguments smaller than 4 bytes are extended, so all arguments | ||||
1588 | // are at least 4 bytes aligned. | ||||
1589 | setMinStackArgumentAlignment(Align(4)); | ||||
1590 | |||||
1591 | // Prefer likely predicted branches to selects on out-of-order cores. | ||||
1592 | PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); | ||||
1593 | |||||
1594 | setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); | ||||
1595 | |||||
1596 | setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); | ||||
1597 | |||||
1598 | if (Subtarget->isThumb() || Subtarget->isThumb2()) | ||||
1599 | setTargetDAGCombine(ISD::ABS); | ||||
1600 | } | ||||
1601 | |||||
1602 | bool ARMTargetLowering::useSoftFloat() const { | ||||
1603 | return Subtarget->useSoftFloat(); | ||||
1604 | } | ||||
1605 | |||||
1606 | // FIXME: It might make sense to define the representative register class as the | ||||
1607 | // nearest super-register that has a non-null superset. For example, DPR_VFP2 is | ||||
1608 | // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, | ||||
1609 | // SPR's representative would be DPR_VFP2. This should work well if register | ||||
1610 | // pressure tracking were modified such that a register use would increment the | ||||
1611 | // pressure of the register class's representative and all of it's super | ||||
1612 | // classes' representatives transitively. We have not implemented this because | ||||
1613 | // of the difficulty prior to coalescing of modeling operand register classes | ||||
1614 | // due to the common occurrence of cross class copies and subregister insertions | ||||
1615 | // and extractions. | ||||
1616 | std::pair<const TargetRegisterClass *, uint8_t> | ||||
1617 | ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, | ||||
1618 | MVT VT) const { | ||||
1619 | const TargetRegisterClass *RRC = nullptr; | ||||
1620 | uint8_t Cost = 1; | ||||
1621 | switch (VT.SimpleTy) { | ||||
1622 | default: | ||||
1623 | return TargetLowering::findRepresentativeClass(TRI, VT); | ||||
1624 | // Use DPR as representative register class for all floating point | ||||
1625 | // and vector types. Since there are 32 SPR registers and 32 DPR registers so | ||||
1626 | // the cost is 1 for both f32 and f64. | ||||
1627 | case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: | ||||
1628 | case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: | ||||
1629 | RRC = &ARM::DPRRegClass; | ||||
1630 | // When NEON is used for SP, only half of the register file is available | ||||
1631 | // because operations that define both SP and DP results will be constrained | ||||
1632 | // to the VFP2 class (D0-D15). We currently model this constraint prior to | ||||
1633 | // coalescing by double-counting the SP regs. See the FIXME above. | ||||
1634 | if (Subtarget->useNEONForSinglePrecisionFP()) | ||||
1635 | Cost = 2; | ||||
1636 | break; | ||||
1637 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: | ||||
1638 | case MVT::v4f32: case MVT::v2f64: | ||||
1639 | RRC = &ARM::DPRRegClass; | ||||
1640 | Cost = 2; | ||||
1641 | break; | ||||
1642 | case MVT::v4i64: | ||||
1643 | RRC = &ARM::DPRRegClass; | ||||
1644 | Cost = 4; | ||||
1645 | break; | ||||
1646 | case MVT::v8i64: | ||||
1647 | RRC = &ARM::DPRRegClass; | ||||
1648 | Cost = 8; | ||||
1649 | break; | ||||
1650 | } | ||||
1651 | return std::make_pair(RRC, Cost); | ||||
1652 | } | ||||
1653 | |||||
1654 | const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { | ||||
1655 | #define MAKE_CASE(V) \ | ||||
1656 | case V: \ | ||||
1657 | return #V; | ||||
1658 | switch ((ARMISD::NodeType)Opcode) { | ||||
1659 | case ARMISD::FIRST_NUMBER: | ||||
1660 | break; | ||||
1661 | MAKE_CASE(ARMISD::Wrapper) | ||||
1662 | MAKE_CASE(ARMISD::WrapperPIC) | ||||
1663 | MAKE_CASE(ARMISD::WrapperJT) | ||||
1664 | MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) | ||||
1665 | MAKE_CASE(ARMISD::CALL) | ||||
1666 | MAKE_CASE(ARMISD::CALL_PRED) | ||||
1667 | MAKE_CASE(ARMISD::CALL_NOLINK) | ||||
1668 | MAKE_CASE(ARMISD::tSECALL) | ||||
1669 | MAKE_CASE(ARMISD::t2CALL_BTI) | ||||
1670 | MAKE_CASE(ARMISD::BRCOND) | ||||
1671 | MAKE_CASE(ARMISD::BR_JT) | ||||
1672 | MAKE_CASE(ARMISD::BR2_JT) | ||||
1673 | MAKE_CASE(ARMISD::RET_FLAG) | ||||
1674 | MAKE_CASE(ARMISD::SERET_FLAG) | ||||
1675 | MAKE_CASE(ARMISD::INTRET_FLAG) | ||||
1676 | MAKE_CASE(ARMISD::PIC_ADD) | ||||
1677 | MAKE_CASE(ARMISD::CMP) | ||||
1678 | MAKE_CASE(ARMISD::CMN) | ||||
1679 | MAKE_CASE(ARMISD::CMPZ) | ||||
1680 | MAKE_CASE(ARMISD::CMPFP) | ||||
1681 | MAKE_CASE(ARMISD::CMPFPE) | ||||
1682 | MAKE_CASE(ARMISD::CMPFPw0) | ||||
1683 | MAKE_CASE(ARMISD::CMPFPEw0) | ||||
1684 | MAKE_CASE(ARMISD::BCC_i64) | ||||
1685 | MAKE_CASE(ARMISD::FMSTAT) | ||||
1686 | MAKE_CASE(ARMISD::CMOV) | ||||
1687 | MAKE_CASE(ARMISD::SUBS) | ||||
1688 | MAKE_CASE(ARMISD::SSAT) | ||||
1689 | MAKE_CASE(ARMISD::USAT) | ||||
1690 | MAKE_CASE(ARMISD::ASRL) | ||||
1691 | MAKE_CASE(ARMISD::LSRL) | ||||
1692 | MAKE_CASE(ARMISD::LSLL) | ||||
1693 | MAKE_CASE(ARMISD::SRL_FLAG) | ||||
1694 | MAKE_CASE(ARMISD::SRA_FLAG) | ||||
1695 | MAKE_CASE(ARMISD::RRX) | ||||
1696 | MAKE_CASE(ARMISD::ADDC) | ||||
1697 | MAKE_CASE(ARMISD::ADDE) | ||||
1698 | MAKE_CASE(ARMISD::SUBC) | ||||
1699 | MAKE_CASE(ARMISD::SUBE) | ||||
1700 | MAKE_CASE(ARMISD::LSLS) | ||||
1701 | MAKE_CASE(ARMISD::VMOVRRD) | ||||
1702 | MAKE_CASE(ARMISD::VMOVDRR) | ||||
1703 | MAKE_CASE(ARMISD::VMOVhr) | ||||
1704 | MAKE_CASE(ARMISD::VMOVrh) | ||||
1705 | MAKE_CASE(ARMISD::VMOVSR) | ||||
1706 | MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) | ||||
1707 | MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) | ||||
1708 | MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) | ||||
1709 | MAKE_CASE(ARMISD::TC_RETURN) | ||||
1710 | MAKE_CASE(ARMISD::THREAD_POINTER) | ||||
1711 | MAKE_CASE(ARMISD::DYN_ALLOC) | ||||
1712 | MAKE_CASE(ARMISD::MEMBARRIER_MCR) | ||||
1713 | MAKE_CASE(ARMISD::PRELOAD) | ||||
1714 | MAKE_CASE(ARMISD::LDRD) | ||||
1715 | MAKE_CASE(ARMISD::STRD) | ||||
1716 | MAKE_CASE(ARMISD::WIN__CHKSTK) | ||||
1717 | MAKE_CASE(ARMISD::WIN__DBZCHK) | ||||
1718 | MAKE_CASE(ARMISD::PREDICATE_CAST) | ||||
1719 | MAKE_CASE(ARMISD::VECTOR_REG_CAST) | ||||
1720 | MAKE_CASE(ARMISD::MVESEXT) | ||||
1721 | MAKE_CASE(ARMISD::MVEZEXT) | ||||
1722 | MAKE_CASE(ARMISD::MVETRUNC) | ||||
1723 | MAKE_CASE(ARMISD::VCMP) | ||||
1724 | MAKE_CASE(ARMISD::VCMPZ) | ||||
1725 | MAKE_CASE(ARMISD::VTST) | ||||
1726 | MAKE_CASE(ARMISD::VSHLs) | ||||
1727 | MAKE_CASE(ARMISD::VSHLu) | ||||
1728 | MAKE_CASE(ARMISD::VSHLIMM) | ||||
1729 | MAKE_CASE(ARMISD::VSHRsIMM) | ||||
1730 | MAKE_CASE(ARMISD::VSHRuIMM) | ||||
1731 | MAKE_CASE(ARMISD::VRSHRsIMM) | ||||
1732 | MAKE_CASE(ARMISD::VRSHRuIMM) | ||||
1733 | MAKE_CASE(ARMISD::VRSHRNIMM) | ||||
1734 | MAKE_CASE(ARMISD::VQSHLsIMM) | ||||
1735 | MAKE_CASE(ARMISD::VQSHLuIMM) | ||||
1736 | MAKE_CASE(ARMISD::VQSHLsuIMM) | ||||
1737 | MAKE_CASE(ARMISD::VQSHRNsIMM) | ||||
1738 | MAKE_CASE(ARMISD::VQSHRNuIMM) | ||||
1739 | MAKE_CASE(ARMISD::VQSHRNsuIMM) | ||||
1740 | MAKE_CASE(ARMISD::VQRSHRNsIMM) | ||||
1741 | MAKE_CASE(ARMISD::VQRSHRNuIMM) | ||||
1742 | MAKE_CASE(ARMISD::VQRSHRNsuIMM) | ||||
1743 | MAKE_CASE(ARMISD::VSLIIMM) | ||||
1744 | MAKE_CASE(ARMISD::VSRIIMM) | ||||
1745 | MAKE_CASE(ARMISD::VGETLANEu) | ||||
1746 | MAKE_CASE(ARMISD::VGETLANEs) | ||||
1747 | MAKE_CASE(ARMISD::VMOVIMM) | ||||
1748 | MAKE_CASE(ARMISD::VMVNIMM) | ||||
1749 | MAKE_CASE(ARMISD::VMOVFPIMM) | ||||
1750 | MAKE_CASE(ARMISD::VDUP) | ||||
1751 | MAKE_CASE(ARMISD::VDUPLANE) | ||||
1752 | MAKE_CASE(ARMISD::VEXT) | ||||
1753 | MAKE_CASE(ARMISD::VREV64) | ||||
1754 | MAKE_CASE(ARMISD::VREV32) | ||||
1755 | MAKE_CASE(ARMISD::VREV16) | ||||
1756 | MAKE_CASE(ARMISD::VZIP) | ||||
1757 | MAKE_CASE(ARMISD::VUZP) | ||||
1758 | MAKE_CASE(ARMISD::VTRN) | ||||
1759 | MAKE_CASE(ARMISD::VTBL1) | ||||
1760 | MAKE_CASE(ARMISD::VTBL2) | ||||
1761 | MAKE_CASE(ARMISD::VMOVN) | ||||
1762 | MAKE_CASE(ARMISD::VQMOVNs) | ||||
1763 | MAKE_CASE(ARMISD::VQMOVNu) | ||||
1764 | MAKE_CASE(ARMISD::VCVTN) | ||||
1765 | MAKE_CASE(ARMISD::VCVTL) | ||||
1766 | MAKE_CASE(ARMISD::VIDUP) | ||||
1767 | MAKE_CASE(ARMISD::VMULLs) | ||||
1768 | MAKE_CASE(ARMISD::VMULLu) | ||||
1769 | MAKE_CASE(ARMISD::VQDMULH) | ||||
1770 | MAKE_CASE(ARMISD::VADDVs) | ||||
1771 | MAKE_CASE(ARMISD::VADDVu) | ||||
1772 | MAKE_CASE(ARMISD::VADDVps) | ||||
1773 | MAKE_CASE(ARMISD::VADDVpu) | ||||
1774 | MAKE_CASE(ARMISD::VADDLVs) | ||||
1775 | MAKE_CASE(ARMISD::VADDLVu) | ||||
1776 | MAKE_CASE(ARMISD::VADDLVAs) | ||||
1777 | MAKE_CASE(ARMISD::VADDLVAu) | ||||
1778 | MAKE_CASE(ARMISD::VADDLVps) | ||||
1779 | MAKE_CASE(ARMISD::VADDLVpu) | ||||
1780 | MAKE_CASE(ARMISD::VADDLVAps) | ||||
1781 | MAKE_CASE(ARMISD::VADDLVApu) | ||||
1782 | MAKE_CASE(ARMISD::VMLAVs) | ||||
1783 | MAKE_CASE(ARMISD::VMLAVu) | ||||
1784 | MAKE_CASE(ARMISD::VMLAVps) | ||||
1785 | MAKE_CASE(ARMISD::VMLAVpu) | ||||
1786 | MAKE_CASE(ARMISD::VMLALVs) | ||||
1787 | MAKE_CASE(ARMISD::VMLALVu) | ||||
1788 | MAKE_CASE(ARMISD::VMLALVps) | ||||
1789 | MAKE_CASE(ARMISD::VMLALVpu) | ||||
1790 | MAKE_CASE(ARMISD::VMLALVAs) | ||||
1791 | MAKE_CASE(ARMISD::VMLALVAu) | ||||
1792 | MAKE_CASE(ARMISD::VMLALVAps) | ||||
1793 | MAKE_CASE(ARMISD::VMLALVApu) | ||||
1794 | MAKE_CASE(ARMISD::VMINVu) | ||||
1795 | MAKE_CASE(ARMISD::VMINVs) | ||||
1796 | MAKE_CASE(ARMISD::VMAXVu) | ||||
1797 | MAKE_CASE(ARMISD::VMAXVs) | ||||
1798 | MAKE_CASE(ARMISD::UMAAL) | ||||
1799 | MAKE_CASE(ARMISD::UMLAL) | ||||
1800 | MAKE_CASE(ARMISD::SMLAL) | ||||
1801 | MAKE_CASE(ARMISD::SMLALBB) | ||||
1802 | MAKE_CASE(ARMISD::SMLALBT) | ||||
1803 | MAKE_CASE(ARMISD::SMLALTB) | ||||
1804 | MAKE_CASE(ARMISD::SMLALTT) | ||||
1805 | MAKE_CASE(ARMISD::SMULWB) | ||||
1806 | MAKE_CASE(ARMISD::SMULWT) | ||||
1807 | MAKE_CASE(ARMISD::SMLALD) | ||||
1808 | MAKE_CASE(ARMISD::SMLALDX) | ||||
1809 | MAKE_CASE(ARMISD::SMLSLD) | ||||
1810 | MAKE_CASE(ARMISD::SMLSLDX) | ||||
1811 | MAKE_CASE(ARMISD::SMMLAR) | ||||
1812 | MAKE_CASE(ARMISD::SMMLSR) | ||||
1813 | MAKE_CASE(ARMISD::QADD16b) | ||||
1814 | MAKE_CASE(ARMISD::QSUB16b) | ||||
1815 | MAKE_CASE(ARMISD::QADD8b) | ||||
1816 | MAKE_CASE(ARMISD::QSUB8b) | ||||
1817 | MAKE_CASE(ARMISD::UQADD16b) | ||||
1818 | MAKE_CASE(ARMISD::UQSUB16b) | ||||
1819 | MAKE_CASE(ARMISD::UQADD8b) | ||||
1820 | MAKE_CASE(ARMISD::UQSUB8b) | ||||
1821 | MAKE_CASE(ARMISD::BUILD_VECTOR) | ||||
1822 | MAKE_CASE(ARMISD::BFI) | ||||
1823 | MAKE_CASE(ARMISD::VORRIMM) | ||||
1824 | MAKE_CASE(ARMISD::VBICIMM) | ||||
1825 | MAKE_CASE(ARMISD::VBSP) | ||||
1826 | MAKE_CASE(ARMISD::MEMCPY) | ||||
1827 | MAKE_CASE(ARMISD::VLD1DUP) | ||||
1828 | MAKE_CASE(ARMISD::VLD2DUP) | ||||
1829 | MAKE_CASE(ARMISD::VLD3DUP) | ||||
1830 | MAKE_CASE(ARMISD::VLD4DUP) | ||||
1831 | MAKE_CASE(ARMISD::VLD1_UPD) | ||||
1832 | MAKE_CASE(ARMISD::VLD2_UPD) | ||||
1833 | MAKE_CASE(ARMISD::VLD3_UPD) | ||||
1834 | MAKE_CASE(ARMISD::VLD4_UPD) | ||||
1835 | MAKE_CASE(ARMISD::VLD1x2_UPD) | ||||
1836 | MAKE_CASE(ARMISD::VLD1x3_UPD) | ||||
1837 | MAKE_CASE(ARMISD::VLD1x4_UPD) | ||||
1838 | MAKE_CASE(ARMISD::VLD2LN_UPD) | ||||
1839 | MAKE_CASE(ARMISD::VLD3LN_UPD) | ||||
1840 | MAKE_CASE(ARMISD::VLD4LN_UPD) | ||||
1841 | MAKE_CASE(ARMISD::VLD1DUP_UPD) | ||||
1842 | MAKE_CASE(ARMISD::VLD2DUP_UPD) | ||||
1843 | MAKE_CASE(ARMISD::VLD3DUP_UPD) | ||||
1844 | MAKE_CASE(ARMISD::VLD4DUP_UPD) | ||||
1845 | MAKE_CASE(ARMISD::VST1_UPD) | ||||
1846 | MAKE_CASE(ARMISD::VST2_UPD) | ||||
1847 | MAKE_CASE(ARMISD::VST3_UPD) | ||||
1848 | MAKE_CASE(ARMISD::VST4_UPD) | ||||
1849 | MAKE_CASE(ARMISD::VST1x2_UPD) | ||||
1850 | MAKE_CASE(ARMISD::VST1x3_UPD) | ||||
1851 | MAKE_CASE(ARMISD::VST1x4_UPD) | ||||
1852 | MAKE_CASE(ARMISD::VST2LN_UPD) | ||||
1853 | MAKE_CASE(ARMISD::VST3LN_UPD) | ||||
1854 | MAKE_CASE(ARMISD::VST4LN_UPD) | ||||
1855 | MAKE_CASE(ARMISD::WLS) | ||||
1856 | MAKE_CASE(ARMISD::WLSSETUP) | ||||
1857 | MAKE_CASE(ARMISD::LE) | ||||
1858 | MAKE_CASE(ARMISD::LOOP_DEC) | ||||
1859 | MAKE_CASE(ARMISD::CSINV) | ||||
1860 | MAKE_CASE(ARMISD::CSNEG) | ||||
1861 | MAKE_CASE(ARMISD::CSINC) | ||||
1862 | MAKE_CASE(ARMISD::MEMCPYLOOP) | ||||
1863 | MAKE_CASE(ARMISD::MEMSETLOOP) | ||||
1864 | #undef MAKE_CASE | ||||
1865 | } | ||||
1866 | return nullptr; | ||||
1867 | } | ||||
1868 | |||||
1869 | EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, | ||||
1870 | EVT VT) const { | ||||
1871 | if (!VT.isVector()) | ||||
1872 | return getPointerTy(DL); | ||||
1873 | |||||
1874 | // MVE has a predicate register. | ||||
1875 | if ((Subtarget->hasMVEIntegerOps() && | ||||
1876 | (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || | ||||
1877 | VT == MVT::v16i8)) || | ||||
1878 | (Subtarget->hasMVEFloatOps() && | ||||
1879 | (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16))) | ||||
1880 | return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); | ||||
1881 | return VT.changeVectorElementTypeToInteger(); | ||||
1882 | } | ||||
1883 | |||||
1884 | /// getRegClassFor - Return the register class that should be used for the | ||||
1885 | /// specified value type. | ||||
1886 | const TargetRegisterClass * | ||||
1887 | ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { | ||||
1888 | (void)isDivergent; | ||||
1889 | // Map v4i64 to QQ registers but do not make the type legal. Similarly map | ||||
1890 | // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to | ||||
1891 | // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive | ||||
1892 | // MVE Q registers. | ||||
1893 | if (Subtarget->hasNEON()) { | ||||
1894 | if (VT == MVT::v4i64) | ||||
1895 | return &ARM::QQPRRegClass; | ||||
1896 | if (VT == MVT::v8i64) | ||||
1897 | return &ARM::QQQQPRRegClass; | ||||
1898 | } | ||||
1899 | if (Subtarget->hasMVEIntegerOps()) { | ||||
1900 | if (VT == MVT::v4i64) | ||||
1901 | return &ARM::MQQPRRegClass; | ||||
1902 | if (VT == MVT::v8i64) | ||||
1903 | return &ARM::MQQQQPRRegClass; | ||||
1904 | } | ||||
1905 | return TargetLowering::getRegClassFor(VT); | ||||
1906 | } | ||||
1907 | |||||
1908 | // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the | ||||
1909 | // source/dest is aligned and the copy size is large enough. We therefore want | ||||
1910 | // to align such objects passed to memory intrinsics. | ||||
1911 | bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, | ||||
1912 | unsigned &PrefAlign) const { | ||||
1913 | if (!isa<MemIntrinsic>(CI)) | ||||
1914 | return false; | ||||
1915 | MinSize = 8; | ||||
1916 | // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 | ||||
1917 | // cycle faster than 4-byte aligned LDM. | ||||
1918 | PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); | ||||
1919 | return true; | ||||
1920 | } | ||||
1921 | |||||
1922 | // Create a fast isel object. | ||||
1923 | FastISel * | ||||
1924 | ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, | ||||
1925 | const TargetLibraryInfo *libInfo) const { | ||||
1926 | return ARM::createFastISel(funcInfo, libInfo); | ||||
1927 | } | ||||
1928 | |||||
1929 | Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { | ||||
1930 | unsigned NumVals = N->getNumValues(); | ||||
1931 | if (!NumVals) | ||||
1932 | return Sched::RegPressure; | ||||
1933 | |||||
1934 | for (unsigned i = 0; i != NumVals; ++i) { | ||||
1935 | EVT VT = N->getValueType(i); | ||||
1936 | if (VT == MVT::Glue || VT == MVT::Other) | ||||
1937 | continue; | ||||
1938 | if (VT.isFloatingPoint() || VT.isVector()) | ||||
1939 | return Sched::ILP; | ||||
1940 | } | ||||
1941 | |||||
1942 | if (!N->isMachineOpcode()) | ||||
1943 | return Sched::RegPressure; | ||||
1944 | |||||
1945 | // Load are scheduled for latency even if there instruction itinerary | ||||
1946 | // is not available. | ||||
1947 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
1948 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); | ||||
1949 | |||||
1950 | if (MCID.getNumDefs() == 0) | ||||
1951 | return Sched::RegPressure; | ||||
1952 | if (!Itins->isEmpty() && | ||||
1953 | Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) | ||||
1954 | return Sched::ILP; | ||||
1955 | |||||
1956 | return Sched::RegPressure; | ||||
1957 | } | ||||
1958 | |||||
1959 | //===----------------------------------------------------------------------===// | ||||
1960 | // Lowering Code | ||||
1961 | //===----------------------------------------------------------------------===// | ||||
1962 | |||||
1963 | static bool isSRL16(const SDValue &Op) { | ||||
1964 | if (Op.getOpcode() != ISD::SRL) | ||||
1965 | return false; | ||||
1966 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
1967 | return Const->getZExtValue() == 16; | ||||
1968 | return false; | ||||
1969 | } | ||||
1970 | |||||
1971 | static bool isSRA16(const SDValue &Op) { | ||||
1972 | if (Op.getOpcode() != ISD::SRA) | ||||
1973 | return false; | ||||
1974 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
1975 | return Const->getZExtValue() == 16; | ||||
1976 | return false; | ||||
1977 | } | ||||
1978 | |||||
1979 | static bool isSHL16(const SDValue &Op) { | ||||
1980 | if (Op.getOpcode() != ISD::SHL) | ||||
1981 | return false; | ||||
1982 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
1983 | return Const->getZExtValue() == 16; | ||||
1984 | return false; | ||||
1985 | } | ||||
1986 | |||||
1987 | // Check for a signed 16-bit value. We special case SRA because it makes it | ||||
1988 | // more simple when also looking for SRAs that aren't sign extending a | ||||
1989 | // smaller value. Without the check, we'd need to take extra care with | ||||
1990 | // checking order for some operations. | ||||
1991 | static bool isS16(const SDValue &Op, SelectionDAG &DAG) { | ||||
1992 | if (isSRA16(Op)) | ||||
1993 | return isSHL16(Op.getOperand(0)); | ||||
1994 | return DAG.ComputeNumSignBits(Op) == 17; | ||||
1995 | } | ||||
1996 | |||||
1997 | /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC | ||||
1998 | static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { | ||||
1999 | switch (CC) { | ||||
2000 | default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2000); | ||||
2001 | case ISD::SETNE: return ARMCC::NE; | ||||
2002 | case ISD::SETEQ: return ARMCC::EQ; | ||||
2003 | case ISD::SETGT: return ARMCC::GT; | ||||
2004 | case ISD::SETGE: return ARMCC::GE; | ||||
2005 | case ISD::SETLT: return ARMCC::LT; | ||||
2006 | case ISD::SETLE: return ARMCC::LE; | ||||
2007 | case ISD::SETUGT: return ARMCC::HI; | ||||
2008 | case ISD::SETUGE: return ARMCC::HS; | ||||
2009 | case ISD::SETULT: return ARMCC::LO; | ||||
2010 | case ISD::SETULE: return ARMCC::LS; | ||||
2011 | } | ||||
2012 | } | ||||
2013 | |||||
2014 | /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. | ||||
2015 | static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | ||||
2016 | ARMCC::CondCodes &CondCode2) { | ||||
2017 | CondCode2 = ARMCC::AL; | ||||
2018 | switch (CC) { | ||||
2019 | default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2019); | ||||
2020 | case ISD::SETEQ: | ||||
2021 | case ISD::SETOEQ: CondCode = ARMCC::EQ; break; | ||||
2022 | case ISD::SETGT: | ||||
2023 | case ISD::SETOGT: CondCode = ARMCC::GT; break; | ||||
2024 | case ISD::SETGE: | ||||
2025 | case ISD::SETOGE: CondCode = ARMCC::GE; break; | ||||
2026 | case ISD::SETOLT: CondCode = ARMCC::MI; break; | ||||
2027 | case ISD::SETOLE: CondCode = ARMCC::LS; break; | ||||
2028 | case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; | ||||
2029 | case ISD::SETO: CondCode = ARMCC::VC; break; | ||||
2030 | case ISD::SETUO: CondCode = ARMCC::VS; break; | ||||
2031 | case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; | ||||
2032 | case ISD::SETUGT: CondCode = ARMCC::HI; break; | ||||
2033 | case ISD::SETUGE: CondCode = ARMCC::PL; break; | ||||
2034 | case ISD::SETLT: | ||||
2035 | case ISD::SETULT: CondCode = ARMCC::LT; break; | ||||
2036 | case ISD::SETLE: | ||||
2037 | case ISD::SETULE: CondCode = ARMCC::LE; break; | ||||
2038 | case ISD::SETNE: | ||||
2039 | case ISD::SETUNE: CondCode = ARMCC::NE; break; | ||||
2040 | } | ||||
2041 | } | ||||
2042 | |||||
2043 | //===----------------------------------------------------------------------===// | ||||
2044 | // Calling Convention Implementation | ||||
2045 | //===----------------------------------------------------------------------===// | ||||
2046 | |||||
2047 | /// getEffectiveCallingConv - Get the effective calling convention, taking into | ||||
2048 | /// account presence of floating point hardware and calling convention | ||||
2049 | /// limitations, such as support for variadic functions. | ||||
2050 | CallingConv::ID | ||||
2051 | ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, | ||||
2052 | bool isVarArg) const { | ||||
2053 | switch (CC) { | ||||
2054 | default: | ||||
2055 | report_fatal_error("Unsupported calling convention"); | ||||
2056 | case CallingConv::ARM_AAPCS: | ||||
2057 | case CallingConv::ARM_APCS: | ||||
2058 | case CallingConv::GHC: | ||||
2059 | case CallingConv::CFGuard_Check: | ||||
2060 | return CC; | ||||
2061 | case CallingConv::PreserveMost: | ||||
2062 | return CallingConv::PreserveMost; | ||||
2063 | case CallingConv::ARM_AAPCS_VFP: | ||||
2064 | case CallingConv::Swift: | ||||
2065 | case CallingConv::SwiftTail: | ||||
2066 | return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; | ||||
2067 | case CallingConv::C: | ||||
2068 | case CallingConv::Tail: | ||||
2069 | if (!Subtarget->isAAPCS_ABI()) | ||||
2070 | return CallingConv::ARM_APCS; | ||||
2071 | else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && | ||||
2072 | getTargetMachine().Options.FloatABIType == FloatABI::Hard && | ||||
2073 | !isVarArg) | ||||
2074 | return CallingConv::ARM_AAPCS_VFP; | ||||
2075 | else | ||||
2076 | return CallingConv::ARM_AAPCS; | ||||
2077 | case CallingConv::Fast: | ||||
2078 | case CallingConv::CXX_FAST_TLS: | ||||
2079 | if (!Subtarget->isAAPCS_ABI()) { | ||||
2080 | if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) | ||||
2081 | return CallingConv::Fast; | ||||
2082 | return CallingConv::ARM_APCS; | ||||
2083 | } else if (Subtarget->hasVFP2Base() && | ||||
2084 | !Subtarget->isThumb1Only() && !isVarArg) | ||||
2085 | return CallingConv::ARM_AAPCS_VFP; | ||||
2086 | else | ||||
2087 | return CallingConv::ARM_AAPCS; | ||||
2088 | } | ||||
2089 | } | ||||
2090 | |||||
2091 | CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, | ||||
2092 | bool isVarArg) const { | ||||
2093 | return CCAssignFnForNode(CC, false, isVarArg); | ||||
2094 | } | ||||
2095 | |||||
2096 | CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, | ||||
2097 | bool isVarArg) const { | ||||
2098 | return CCAssignFnForNode(CC, true, isVarArg); | ||||
2099 | } | ||||
2100 | |||||
2101 | /// CCAssignFnForNode - Selects the correct CCAssignFn for the given | ||||
2102 | /// CallingConvention. | ||||
2103 | CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, | ||||
2104 | bool Return, | ||||
2105 | bool isVarArg) const { | ||||
2106 | switch (getEffectiveCallingConv(CC, isVarArg)) { | ||||
2107 | default: | ||||
2108 | report_fatal_error("Unsupported calling convention"); | ||||
2109 | case CallingConv::ARM_APCS: | ||||
2110 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); | ||||
2111 | case CallingConv::ARM_AAPCS: | ||||
2112 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | ||||
2113 | case CallingConv::ARM_AAPCS_VFP: | ||||
2114 | return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); | ||||
2115 | case CallingConv::Fast: | ||||
2116 | return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); | ||||
2117 | case CallingConv::GHC: | ||||
2118 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); | ||||
2119 | case CallingConv::PreserveMost: | ||||
2120 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | ||||
2121 | case CallingConv::CFGuard_Check: | ||||
2122 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check); | ||||
2123 | } | ||||
2124 | } | ||||
2125 | |||||
2126 | SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, | ||||
2127 | MVT LocVT, MVT ValVT, SDValue Val) const { | ||||
2128 | Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()), | ||||
2129 | Val); | ||||
2130 | if (Subtarget->hasFullFP16()) { | ||||
2131 | Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val); | ||||
2132 | } else { | ||||
2133 | Val = DAG.getNode(ISD::TRUNCATE, dl, | ||||
2134 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | ||||
2135 | Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val); | ||||
2136 | } | ||||
2137 | return Val; | ||||
2138 | } | ||||
2139 | |||||
2140 | SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, | ||||
2141 | MVT LocVT, MVT ValVT, | ||||
2142 | SDValue Val) const { | ||||
2143 | if (Subtarget->hasFullFP16()) { | ||||
2144 | Val = DAG.getNode(ARMISD::VMOVrh, dl, | ||||
2145 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | ||||
2146 | } else { | ||||
2147 | Val = DAG.getNode(ISD::BITCAST, dl, | ||||
2148 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | ||||
2149 | Val = DAG.getNode(ISD::ZERO_EXTEND, dl, | ||||
2150 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | ||||
2151 | } | ||||
2152 | return DAG.getNode(ISD::BITCAST, dl, LocVT, Val); | ||||
2153 | } | ||||
2154 | |||||
2155 | /// LowerCallResult - Lower the result values of a call into the | ||||
2156 | /// appropriate copies out of appropriate physical registers. | ||||
2157 | SDValue ARMTargetLowering::LowerCallResult( | ||||
2158 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, | ||||
2159 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | ||||
2160 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, | ||||
2161 | SDValue ThisVal) const { | ||||
2162 | // Assign locations to each value returned by this call. | ||||
2163 | SmallVector<CCValAssign, 16> RVLocs; | ||||
2164 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | ||||
2165 | *DAG.getContext()); | ||||
2166 | CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
2167 | |||||
2168 | // Copy all of the result registers out of their specified physreg. | ||||
2169 | for (unsigned i = 0; i != RVLocs.size(); ++i) { | ||||
2170 | CCValAssign VA = RVLocs[i]; | ||||
2171 | |||||
2172 | // Pass 'this' value directly from the argument to return value, to avoid | ||||
2173 | // reg unit interference | ||||
2174 | if (i == 0 && isThisReturn) { | ||||
2175 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2176, __extension__ __PRETTY_FUNCTION__)) | ||||
2176 | "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2176, __extension__ __PRETTY_FUNCTION__)); | ||||
2177 | InVals.push_back(ThisVal); | ||||
2178 | continue; | ||||
2179 | } | ||||
2180 | |||||
2181 | SDValue Val; | ||||
2182 | if (VA.needsCustom() && | ||||
2183 | (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) { | ||||
2184 | // Handle f64 or half of a v2f64. | ||||
2185 | SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | ||||
2186 | InFlag); | ||||
2187 | Chain = Lo.getValue(1); | ||||
2188 | InFlag = Lo.getValue(2); | ||||
2189 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2190 | SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | ||||
2191 | InFlag); | ||||
2192 | Chain = Hi.getValue(1); | ||||
2193 | InFlag = Hi.getValue(2); | ||||
2194 | if (!Subtarget->isLittle()) | ||||
2195 | std::swap (Lo, Hi); | ||||
2196 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | ||||
2197 | |||||
2198 | if (VA.getLocVT() == MVT::v2f64) { | ||||
2199 | SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | ||||
2200 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | ||||
2201 | DAG.getConstant(0, dl, MVT::i32)); | ||||
2202 | |||||
2203 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2204 | Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | ||||
2205 | Chain = Lo.getValue(1); | ||||
2206 | InFlag = Lo.getValue(2); | ||||
2207 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2208 | Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | ||||
2209 | Chain = Hi.getValue(1); | ||||
2210 | InFlag = Hi.getValue(2); | ||||
2211 | if (!Subtarget->isLittle()) | ||||
2212 | std::swap (Lo, Hi); | ||||
2213 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | ||||
2214 | Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | ||||
2215 | DAG.getConstant(1, dl, MVT::i32)); | ||||
2216 | } | ||||
2217 | } else { | ||||
2218 | Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), | ||||
2219 | InFlag); | ||||
2220 | Chain = Val.getValue(1); | ||||
2221 | InFlag = Val.getValue(2); | ||||
2222 | } | ||||
2223 | |||||
2224 | switch (VA.getLocInfo()) { | ||||
2225 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2225); | ||||
2226 | case CCValAssign::Full: break; | ||||
2227 | case CCValAssign::BCvt: | ||||
2228 | Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); | ||||
2229 | break; | ||||
2230 | } | ||||
2231 | |||||
2232 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
2233 | // had been copied to the LSBs of a 32-bit register. | ||||
2234 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
2235 | if (VA.needsCustom() && | ||||
2236 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | ||||
2237 | Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val); | ||||
2238 | |||||
2239 | InVals.push_back(Val); | ||||
2240 | } | ||||
2241 | |||||
2242 | return Chain; | ||||
2243 | } | ||||
2244 | |||||
2245 | std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( | ||||
2246 | const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr, | ||||
2247 | bool IsTailCall, int SPDiff) const { | ||||
2248 | SDValue DstAddr; | ||||
2249 | MachinePointerInfo DstInfo; | ||||
2250 | int32_t Offset = VA.getLocMemOffset(); | ||||
2251 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2252 | |||||
2253 | if (IsTailCall) { | ||||
2254 | Offset += SPDiff; | ||||
2255 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
2256 | int Size = VA.getLocVT().getFixedSizeInBits() / 8; | ||||
2257 | int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); | ||||
2258 | DstAddr = DAG.getFrameIndex(FI, PtrVT); | ||||
2259 | DstInfo = | ||||
2260 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); | ||||
2261 | } else { | ||||
2262 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); | ||||
2263 | DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | ||||
2264 | StackPtr, PtrOff); | ||||
2265 | DstInfo = | ||||
2266 | MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset); | ||||
2267 | } | ||||
2268 | |||||
2269 | return std::make_pair(DstAddr, DstInfo); | ||||
2270 | } | ||||
2271 | |||||
2272 | void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, | ||||
2273 | SDValue Chain, SDValue &Arg, | ||||
2274 | RegsToPassVector &RegsToPass, | ||||
2275 | CCValAssign &VA, CCValAssign &NextVA, | ||||
2276 | SDValue &StackPtr, | ||||
2277 | SmallVectorImpl<SDValue> &MemOpChains, | ||||
2278 | bool IsTailCall, | ||||
2279 | int SPDiff) const { | ||||
2280 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
2281 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | ||||
2282 | unsigned id = Subtarget->isLittle() ? 0 : 1; | ||||
2283 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); | ||||
2284 | |||||
2285 | if (NextVA.isRegLoc()) | ||||
2286 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); | ||||
2287 | else { | ||||
2288 | assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail ("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2288, __extension__ __PRETTY_FUNCTION__)); | ||||
2289 | if (!StackPtr.getNode()) | ||||
2290 | StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, | ||||
2291 | getPointerTy(DAG.getDataLayout())); | ||||
2292 | |||||
2293 | SDValue DstAddr; | ||||
2294 | MachinePointerInfo DstInfo; | ||||
2295 | std::tie(DstAddr, DstInfo) = | ||||
2296 | computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff); | ||||
2297 | MemOpChains.push_back( | ||||
2298 | DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo)); | ||||
2299 | } | ||||
2300 | } | ||||
2301 | |||||
2302 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { | ||||
2303 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || | ||||
2304 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; | ||||
2305 | } | ||||
2306 | |||||
2307 | /// LowerCall - Lowering a call into a callseq_start <- | ||||
2308 | /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter | ||||
2309 | /// nodes. | ||||
2310 | SDValue | ||||
2311 | ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||||
2312 | SmallVectorImpl<SDValue> &InVals) const { | ||||
2313 | SelectionDAG &DAG = CLI.DAG; | ||||
2314 | SDLoc &dl = CLI.DL; | ||||
2315 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | ||||
2316 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | ||||
2317 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | ||||
2318 | SDValue Chain = CLI.Chain; | ||||
2319 | SDValue Callee = CLI.Callee; | ||||
2320 | bool &isTailCall = CLI.IsTailCall; | ||||
2321 | CallingConv::ID CallConv = CLI.CallConv; | ||||
2322 | bool doesNotRet = CLI.DoesNotReturn; | ||||
2323 | bool isVarArg = CLI.IsVarArg; | ||||
2324 | |||||
2325 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2326 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
2327 | MachineFunction::CallSiteInfo CSInfo; | ||||
2328 | bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); | ||||
| |||||
2329 | bool isThisReturn = false; | ||||
2330 | bool isCmseNSCall = false; | ||||
2331 | bool isSibCall = false; | ||||
2332 | bool PreferIndirect = false; | ||||
2333 | bool GuardWithBTI = false; | ||||
2334 | |||||
2335 | // Lower 'returns_twice' calls to a pseudo-instruction. | ||||
2336 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && | ||||
2337 | !Subtarget->noBTIAtReturnTwice()) | ||||
2338 | GuardWithBTI = AFI->branchTargetEnforcement(); | ||||
2339 | |||||
2340 | // Determine whether this is a non-secure function call. | ||||
2341 | if (CLI.CB
| ||||
2342 | isCmseNSCall = true; | ||||
2343 | |||||
2344 | // Disable tail calls if they're not supported. | ||||
2345 | if (!Subtarget->supportsTailCall()) | ||||
2346 | isTailCall = false; | ||||
2347 | |||||
2348 | // For both the non-secure calls and the returns from a CMSE entry function, | ||||
2349 | // the function needs to do some extra work afte r the call, or before the | ||||
2350 | // return, respectively, thus it cannot end with atail call | ||||
2351 | if (isCmseNSCall
| ||||
2352 | isTailCall = false; | ||||
2353 | |||||
2354 | if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2355 | // If we're optimizing for minimum size and the function is called three or | ||||
2356 | // more times in this block, we can improve codesize by calling indirectly | ||||
2357 | // as BLXr has a 16-bit encoding. | ||||
2358 | auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); | ||||
2359 | if (CLI.CB) { | ||||
2360 | auto *BB = CLI.CB->getParent(); | ||||
2361 | PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() && | ||||
2362 | count_if(GV->users(), [&BB](const User *U) { | ||||
2363 | return isa<Instruction>(U) && | ||||
2364 | cast<Instruction>(U)->getParent() == BB; | ||||
2365 | }) > 2; | ||||
2366 | } | ||||
2367 | } | ||||
2368 | if (isTailCall) { | ||||
2369 | // Check if it's really possible to do a tail call. | ||||
2370 | isTailCall = IsEligibleForTailCallOptimization( | ||||
2371 | Callee, CallConv, isVarArg, isStructRet, | ||||
2372 | MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, | ||||
2373 | PreferIndirect); | ||||
2374 | |||||
2375 | if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt && | ||||
2376 | CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail) | ||||
2377 | isSibCall = true; | ||||
2378 | |||||
2379 | // We don't support GuaranteedTailCallOpt for ARM, only automatically | ||||
2380 | // detected sibcalls. | ||||
2381 | if (isTailCall) | ||||
2382 | ++NumTailCalls; | ||||
2383 | } | ||||
2384 | |||||
2385 | if (!isTailCall
| ||||
2386 | report_fatal_error("failed to perform tail call elimination on a call " | ||||
2387 | "site marked musttail"); | ||||
2388 | // Analyze operands of the call, assigning locations to each operand. | ||||
2389 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
2390 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | ||||
2391 | *DAG.getContext()); | ||||
2392 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); | ||||
2393 | |||||
2394 | // Get a count of how many bytes are to be pushed on the stack. | ||||
2395 | unsigned NumBytes = CCInfo.getNextStackOffset(); | ||||
2396 | |||||
2397 | // SPDiff is the byte offset of the call's argument area from the callee's. | ||||
2398 | // Stores to callee stack arguments will be placed in FixedStackSlots offset | ||||
2399 | // by this amount for a tail call. In a sibling call it must be 0 because the | ||||
2400 | // caller will deallocate the entire stack and the callee still expects its | ||||
2401 | // arguments to begin at SP+0. Completely unused for non-tail calls. | ||||
2402 | int SPDiff = 0; | ||||
2403 | |||||
2404 | if (isTailCall && !isSibCall) { | ||||
2405 | auto FuncInfo = MF.getInfo<ARMFunctionInfo>(); | ||||
2406 | unsigned NumReusableBytes = FuncInfo->getArgumentStackSize(); | ||||
2407 | |||||
2408 | // Since callee will pop argument stack as a tail call, we must keep the | ||||
2409 | // popped size 16-byte aligned. | ||||
2410 | Align StackAlign = DAG.getDataLayout().getStackAlignment(); | ||||
2411 | NumBytes = alignTo(NumBytes, StackAlign); | ||||
2412 | |||||
2413 | // SPDiff will be negative if this tail call requires more space than we | ||||
2414 | // would automatically have in our incoming argument space. Positive if we | ||||
2415 | // can actually shrink the stack. | ||||
2416 | SPDiff = NumReusableBytes - NumBytes; | ||||
2417 | |||||
2418 | // If this call requires more stack than we have available from | ||||
2419 | // LowerFormalArguments, tell FrameLowering to reserve space for it. | ||||
2420 | if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff) | ||||
2421 | AFI->setArgRegsSaveSize(-SPDiff); | ||||
2422 | } | ||||
2423 | |||||
2424 | if (isSibCall
| ||||
2425 | // For sibling tail calls, memory operands are available in our caller's stack. | ||||
2426 | NumBytes = 0; | ||||
2427 | } else { | ||||
2428 | // Adjust the stack pointer for the new arguments... | ||||
2429 | // These operations are automatically eliminated by the prolog/epilog pass | ||||
2430 | Chain = DAG.getCALLSEQ_START(Chain, isTailCall
| ||||
2431 | } | ||||
2432 | |||||
2433 | SDValue StackPtr = | ||||
2434 | DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); | ||||
2435 | |||||
2436 | RegsToPassVector RegsToPass; | ||||
2437 | SmallVector<SDValue, 8> MemOpChains; | ||||
2438 | |||||
2439 | // During a tail call, stores to the argument area must happen after all of | ||||
2440 | // the function's incoming arguments have been loaded because they may alias. | ||||
2441 | // This is done by folding in a TokenFactor from LowerFormalArguments, but | ||||
2442 | // there's no point in doing so repeatedly so this tracks whether that's | ||||
2443 | // happened yet. | ||||
2444 | bool AfterFormalArgLoads = false; | ||||
2445 | |||||
2446 | // Walk the register/memloc assignments, inserting copies/loads. In the case | ||||
2447 | // of tail call optimization, arguments are handled later. | ||||
2448 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | ||||
2449 | i != e; | ||||
2450 | ++i, ++realArgIdx) { | ||||
2451 | CCValAssign &VA = ArgLocs[i]; | ||||
2452 | SDValue Arg = OutVals[realArgIdx]; | ||||
2453 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | ||||
2454 | bool isByVal = Flags.isByVal(); | ||||
2455 | |||||
2456 | // Promote the value if needed. | ||||
2457 | switch (VA.getLocInfo()) { | ||||
2458 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2458); | ||||
2459 | case CCValAssign::Full: break; | ||||
2460 | case CCValAssign::SExt: | ||||
2461 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2462 | break; | ||||
2463 | case CCValAssign::ZExt: | ||||
2464 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2465 | break; | ||||
2466 | case CCValAssign::AExt: | ||||
2467 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2468 | break; | ||||
2469 | case CCValAssign::BCvt: | ||||
2470 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
2471 | break; | ||||
2472 | } | ||||
2473 | |||||
2474 | if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { | ||||
2475 | Chain = DAG.getStackArgumentTokenFactor(Chain); | ||||
2476 | AfterFormalArgLoads = true; | ||||
2477 | } | ||||
2478 | |||||
2479 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
2480 | // had been copied to the LSBs of a 32-bit register. | ||||
2481 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
2482 | if (VA.needsCustom() && | ||||
2483 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { | ||||
2484 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | ||||
2485 | } else { | ||||
2486 | // f16 arguments could have been extended prior to argument lowering. | ||||
2487 | // Mask them arguments if this is a CMSE nonsecure call. | ||||
2488 | auto ArgVT = Outs[realArgIdx].ArgVT; | ||||
2489 | if (isCmseNSCall && (ArgVT == MVT::f16)) { | ||||
2490 | auto LocBits = VA.getLocVT().getSizeInBits(); | ||||
2491 | auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits()); | ||||
2492 | SDValue Mask = | ||||
2493 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | ||||
2494 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | ||||
2495 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | ||||
2496 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
2497 | } | ||||
2498 | } | ||||
2499 | |||||
2500 | // f64 and v2f64 might be passed in i32 pairs and must be split into pieces | ||||
2501 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | ||||
2502 | SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
2503 | DAG.getConstant(0, dl, MVT::i32)); | ||||
2504 | SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
2505 | DAG.getConstant(1, dl, MVT::i32)); | ||||
2506 | |||||
2507 | PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], | ||||
2508 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2509 | |||||
2510 | VA = ArgLocs[++i]; // skip ahead to next loc | ||||
2511 | if (VA.isRegLoc()) { | ||||
2512 | PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], | ||||
2513 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2514 | } else { | ||||
2515 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2515, __extension__ __PRETTY_FUNCTION__)); | ||||
2516 | SDValue DstAddr; | ||||
2517 | MachinePointerInfo DstInfo; | ||||
2518 | std::tie(DstAddr, DstInfo) = | ||||
2519 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2520 | MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo)); | ||||
2521 | } | ||||
2522 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | ||||
2523 | PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], | ||||
2524 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2525 | } else if (VA.isRegLoc()) { | ||||
2526 | if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && | ||||
2527 | Outs[0].VT == MVT::i32) { | ||||
2528 | assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2529, __extension__ __PRETTY_FUNCTION__)) | ||||
2529 | "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2529, __extension__ __PRETTY_FUNCTION__)); | ||||
2530 | assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2531, __extension__ __PRETTY_FUNCTION__)) | ||||
2531 | "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2531, __extension__ __PRETTY_FUNCTION__)); | ||||
2532 | isThisReturn = true; | ||||
2533 | } | ||||
2534 | const TargetOptions &Options = DAG.getTarget().Options; | ||||
2535 | if (Options.EmitCallSiteInfo) | ||||
2536 | CSInfo.emplace_back(VA.getLocReg(), i); | ||||
2537 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | ||||
2538 | } else if (isByVal) { | ||||
2539 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2539, __extension__ __PRETTY_FUNCTION__)); | ||||
2540 | unsigned offset = 0; | ||||
2541 | |||||
2542 | // True if this byval aggregate will be split between registers | ||||
2543 | // and memory. | ||||
2544 | unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); | ||||
2545 | unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); | ||||
2546 | |||||
2547 | if (CurByValIdx < ByValArgsCount) { | ||||
2548 | |||||
2549 | unsigned RegBegin, RegEnd; | ||||
2550 | CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); | ||||
2551 | |||||
2552 | EVT PtrVT = | ||||
2553 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | ||||
2554 | unsigned int i, j; | ||||
2555 | for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { | ||||
2556 | SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); | ||||
2557 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); | ||||
2558 | SDValue Load = | ||||
2559 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), | ||||
2560 | DAG.InferPtrAlign(AddArg)); | ||||
2561 | MemOpChains.push_back(Load.getValue(1)); | ||||
2562 | RegsToPass.push_back(std::make_pair(j, Load)); | ||||
2563 | } | ||||
2564 | |||||
2565 | // If parameter size outsides register area, "offset" value | ||||
2566 | // helps us to calculate stack slot for remained part properly. | ||||
2567 | offset = RegEnd - RegBegin; | ||||
2568 | |||||
2569 | CCInfo.nextInRegsParam(); | ||||
2570 | } | ||||
2571 | |||||
2572 | if (Flags.getByValSize() > 4*offset) { | ||||
2573 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
2574 | SDValue Dst; | ||||
2575 | MachinePointerInfo DstInfo; | ||||
2576 | std::tie(Dst, DstInfo) = | ||||
2577 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2578 | SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); | ||||
2579 | SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); | ||||
2580 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, | ||||
2581 | MVT::i32); | ||||
2582 | SDValue AlignNode = | ||||
2583 | DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32); | ||||
2584 | |||||
2585 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
2586 | SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; | ||||
2587 | MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, | ||||
2588 | Ops)); | ||||
2589 | } | ||||
2590 | } else { | ||||
2591 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2591, __extension__ __PRETTY_FUNCTION__)); | ||||
2592 | SDValue DstAddr; | ||||
2593 | MachinePointerInfo DstInfo; | ||||
2594 | std::tie(DstAddr, DstInfo) = | ||||
2595 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2596 | |||||
2597 | SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo); | ||||
2598 | MemOpChains.push_back(Store); | ||||
2599 | } | ||||
2600 | } | ||||
2601 | |||||
2602 | if (!MemOpChains.empty()) | ||||
2603 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); | ||||
2604 | |||||
2605 | // Build a sequence of copy-to-reg nodes chained together with token chain | ||||
2606 | // and flag operands which copy the outgoing args into the appropriate regs. | ||||
2607 | SDValue InFlag; | ||||
2608 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { | ||||
2609 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, | ||||
2610 | RegsToPass[i].second, InFlag); | ||||
2611 | InFlag = Chain.getValue(1); | ||||
2612 | } | ||||
2613 | |||||
2614 | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every | ||||
2615 | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol | ||||
2616 | // node so that legalize doesn't hack it. | ||||
2617 | bool isDirect = false; | ||||
2618 | |||||
2619 | const TargetMachine &TM = getTargetMachine(); | ||||
2620 | const Module *Mod = MF.getFunction().getParent(); | ||||
2621 | const GlobalValue *GV = nullptr; | ||||
2622 | if (GlobalAddressSDNode *G
| ||||
2623 | GV = G->getGlobal(); | ||||
2624 | bool isStub = | ||||
2625 | !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); | ||||
2626 | |||||
2627 | bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); | ||||
2628 | bool isLocalARMFunc = false; | ||||
2629 | auto PtrVt = getPointerTy(DAG.getDataLayout()); | ||||
2630 | |||||
2631 | if (Subtarget->genLongCalls()) { | ||||
2632 | assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2633, __extension__ __PRETTY_FUNCTION__)) | ||||
2633 | "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2633, __extension__ __PRETTY_FUNCTION__)); | ||||
2634 | // Handle a global address or an external symbol. If it's not one of | ||||
2635 | // those, the target's already in a register, so we don't need to do | ||||
2636 | // anything extra. | ||||
2637 | if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2638 | // Create a constant pool entry for the callee address | ||||
2639 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2640 | ARMConstantPoolValue *CPV = | ||||
2641 | ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); | ||||
2642 | |||||
2643 | // Get the address of the callee into a register | ||||
2644 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2645 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
2646 | Callee = DAG.getLoad( | ||||
2647 | PtrVt, dl, DAG.getEntryNode(), CPAddr, | ||||
2648 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2649 | } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { | ||||
2650 | const char *Sym = S->getSymbol(); | ||||
2651 | |||||
2652 | // Create a constant pool entry for the callee address | ||||
2653 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2654 | ARMConstantPoolValue *CPV = | ||||
2655 | ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, | ||||
2656 | ARMPCLabelIndex, 0); | ||||
2657 | // Get the address of the callee into a register | ||||
2658 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2659 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
2660 | Callee = DAG.getLoad( | ||||
2661 | PtrVt, dl, DAG.getEntryNode(), CPAddr, | ||||
2662 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2663 | } | ||||
2664 | } else if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2665 | if (!PreferIndirect
| ||||
2666 | isDirect = true; | ||||
2667 | bool isDef = GV->isStrongDefinitionForLinker(); | ||||
| |||||
2668 | |||||
2669 | // ARM call to a local ARM function is predicable. | ||||
2670 | isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); | ||||
2671 | // tBX takes a register source operand. | ||||
2672 | if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | ||||
2673 | assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?") ? void (0) : __assert_fail ( "Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2673, __extension__ __PRETTY_FUNCTION__)); | ||||
2674 | Callee = DAG.getNode( | ||||
2675 | ARMISD::WrapperPIC, dl, PtrVt, | ||||
2676 | DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); | ||||
2677 | Callee = DAG.getLoad( | ||||
2678 | PtrVt, dl, DAG.getEntryNode(), Callee, | ||||
2679 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), | ||||
2680 | MachineMemOperand::MODereferenceable | | ||||
2681 | MachineMemOperand::MOInvariant); | ||||
2682 | } else if (Subtarget->isTargetCOFF()) { | ||||
2683 | assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2684, __extension__ __PRETTY_FUNCTION__)) | ||||
2684 | "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2684, __extension__ __PRETTY_FUNCTION__)); | ||||
2685 | unsigned TargetFlags = ARMII::MO_NO_FLAG; | ||||
2686 | if (GV->hasDLLImportStorageClass()) | ||||
2687 | TargetFlags = ARMII::MO_DLLIMPORT; | ||||
2688 | else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) | ||||
2689 | TargetFlags = ARMII::MO_COFFSTUB; | ||||
2690 | Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0, | ||||
2691 | TargetFlags); | ||||
2692 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | ||||
2693 | Callee = | ||||
2694 | DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), | ||||
2695 | DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), | ||||
2696 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
2697 | } else { | ||||
2698 | Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); | ||||
2699 | } | ||||
2700 | } | ||||
2701 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { | ||||
2702 | isDirect = true; | ||||
2703 | // tBX takes a register source operand. | ||||
2704 | const char *Sym = S->getSymbol(); | ||||
2705 | if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | ||||
2706 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2707 | ARMConstantPoolValue *CPV = | ||||
2708 | ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, | ||||
2709 | ARMPCLabelIndex, 4); | ||||
2710 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2711 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
2712 | Callee = DAG.getLoad( | ||||
2713 | PtrVt, dl, DAG.getEntryNode(), CPAddr, | ||||
2714 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2715 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
2716 | Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); | ||||
2717 | } else { | ||||
2718 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); | ||||
2719 | } | ||||
2720 | } | ||||
2721 | |||||
2722 | if (isCmseNSCall) { | ||||
2723 | assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2724, __extension__ __PRETTY_FUNCTION__)) | ||||
2724 | "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2724, __extension__ __PRETTY_FUNCTION__)); | ||||
2725 | if (NumBytes > 0) { | ||||
2726 | DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(), | ||||
2727 | "call to non-secure function would " | ||||
2728 | "require passing arguments on stack", | ||||
2729 | dl.getDebugLoc()); | ||||
2730 | DAG.getContext()->diagnose(Diag); | ||||
2731 | } | ||||
2732 | if (isStructRet) { | ||||
2733 | DiagnosticInfoUnsupported Diag( | ||||
2734 | DAG.getMachineFunction().getFunction(), | ||||
2735 | "call to non-secure function would return value through pointer", | ||||
2736 | dl.getDebugLoc()); | ||||
2737 | DAG.getContext()->diagnose(Diag); | ||||
2738 | } | ||||
2739 | } | ||||
2740 | |||||
2741 | // FIXME: handle tail calls differently. | ||||
2742 | unsigned CallOpc; | ||||
2743 | if (Subtarget->isThumb()) { | ||||
2744 | if (GuardWithBTI) | ||||
2745 | CallOpc = ARMISD::t2CALL_BTI; | ||||
2746 | else if (isCmseNSCall) | ||||
2747 | CallOpc = ARMISD::tSECALL; | ||||
2748 | else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) | ||||
2749 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2750 | else | ||||
2751 | CallOpc = ARMISD::CALL; | ||||
2752 | } else { | ||||
2753 | if (!isDirect && !Subtarget->hasV5TOps()) | ||||
2754 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2755 | else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && | ||||
2756 | // Emit regular call when code size is the priority | ||||
2757 | !Subtarget->hasMinSize()) | ||||
2758 | // "mov lr, pc; b _foo" to avoid confusing the RSP | ||||
2759 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2760 | else | ||||
2761 | CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; | ||||
2762 | } | ||||
2763 | |||||
2764 | // We don't usually want to end the call-sequence here because we would tidy | ||||
2765 | // the frame up *after* the call, however in the ABI-changing tail-call case | ||||
2766 | // we've carefully laid out the parameters so that when sp is reset they'll be | ||||
2767 | // in the correct location. | ||||
2768 | if (isTailCall && !isSibCall) { | ||||
2769 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), | ||||
2770 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); | ||||
2771 | InFlag = Chain.getValue(1); | ||||
2772 | } | ||||
2773 | |||||
2774 | std::vector<SDValue> Ops; | ||||
2775 | Ops.push_back(Chain); | ||||
2776 | Ops.push_back(Callee); | ||||
2777 | |||||
2778 | if (isTailCall) { | ||||
2779 | Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32)); | ||||
2780 | } | ||||
2781 | |||||
2782 | // Add argument registers to the end of the list so that they are known live | ||||
2783 | // into the call. | ||||
2784 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) | ||||
2785 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, | ||||
2786 | RegsToPass[i].second.getValueType())); | ||||
2787 | |||||
2788 | // Add a register mask operand representing the call-preserved registers. | ||||
2789 | if (!isTailCall) { | ||||
2790 | const uint32_t *Mask; | ||||
2791 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | ||||
2792 | if (isThisReturn) { | ||||
2793 | // For 'this' returns, use the R0-preserving mask if applicable | ||||
2794 | Mask = ARI->getThisReturnPreservedMask(MF, CallConv); | ||||
2795 | if (!Mask) { | ||||
2796 | // Set isThisReturn to false if the calling convention is not one that | ||||
2797 | // allows 'returned' to be modeled in this way, so LowerCallResult does | ||||
2798 | // not try to pass 'this' straight through | ||||
2799 | isThisReturn = false; | ||||
2800 | Mask = ARI->getCallPreservedMask(MF, CallConv); | ||||
2801 | } | ||||
2802 | } else | ||||
2803 | Mask = ARI->getCallPreservedMask(MF, CallConv); | ||||
2804 | |||||
2805 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2805, __extension__ __PRETTY_FUNCTION__)); | ||||
2806 | Ops.push_back(DAG.getRegisterMask(Mask)); | ||||
2807 | } | ||||
2808 | |||||
2809 | if (InFlag.getNode()) | ||||
2810 | Ops.push_back(InFlag); | ||||
2811 | |||||
2812 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
2813 | if (isTailCall) { | ||||
2814 | MF.getFrameInfo().setHasTailCall(); | ||||
2815 | SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); | ||||
2816 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); | ||||
2817 | return Ret; | ||||
2818 | } | ||||
2819 | |||||
2820 | // Returns a chain and a flag for retval copy to use. | ||||
2821 | Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); | ||||
2822 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); | ||||
2823 | InFlag = Chain.getValue(1); | ||||
2824 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); | ||||
2825 | |||||
2826 | // If we're guaranteeing tail-calls will be honoured, the callee must | ||||
2827 | // pop its own argument stack on return. But this call is *not* a tail call so | ||||
2828 | // we need to undo that after it returns to restore the status-quo. | ||||
2829 | bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; | ||||
2830 | uint64_t CalleePopBytes = | ||||
2831 | canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL; | ||||
2832 | |||||
2833 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), | ||||
2834 | DAG.getIntPtrConstant(CalleePopBytes, dl, true), | ||||
2835 | InFlag, dl); | ||||
2836 | if (!Ins.empty()) | ||||
2837 | InFlag = Chain.getValue(1); | ||||
2838 | |||||
2839 | // Handle result values, copying them out of physregs into vregs that we | ||||
2840 | // return. | ||||
2841 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, | ||||
2842 | InVals, isThisReturn, | ||||
2843 | isThisReturn ? OutVals[0] : SDValue()); | ||||
2844 | } | ||||
2845 | |||||
2846 | /// HandleByVal - Every parameter *after* a byval parameter is passed | ||||
2847 | /// on the stack. Remember the next parameter register to allocate, | ||||
2848 | /// and then confiscate the rest of the parameter registers to insure | ||||
2849 | /// this. | ||||
2850 | void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, | ||||
2851 | Align Alignment) const { | ||||
2852 | // Byval (as with any stack) slots are always at least 4 byte aligned. | ||||
2853 | Alignment = std::max(Alignment, Align(4)); | ||||
2854 | |||||
2855 | unsigned Reg = State->AllocateReg(GPRArgRegs); | ||||
2856 | if (!Reg) | ||||
2857 | return; | ||||
2858 | |||||
2859 | unsigned AlignInRegs = Alignment.value() / 4; | ||||
2860 | unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; | ||||
2861 | for (unsigned i = 0; i < Waste; ++i) | ||||
2862 | Reg = State->AllocateReg(GPRArgRegs); | ||||
2863 | |||||
2864 | if (!Reg) | ||||
2865 | return; | ||||
2866 | |||||
2867 | unsigned Excess = 4 * (ARM::R4 - Reg); | ||||
2868 | |||||
2869 | // Special case when NSAA != SP and parameter size greater than size of | ||||
2870 | // all remained GPR regs. In that case we can't split parameter, we must | ||||
2871 | // send it to stack. We also must set NCRN to R4, so waste all | ||||
2872 | // remained registers. | ||||
2873 | const unsigned NSAAOffset = State->getNextStackOffset(); | ||||
2874 | if (NSAAOffset != 0 && Size > Excess) { | ||||
2875 | while (State->AllocateReg(GPRArgRegs)) | ||||
2876 | ; | ||||
2877 | return; | ||||
2878 | } | ||||
2879 | |||||
2880 | // First register for byval parameter is the first register that wasn't | ||||
2881 | // allocated before this method call, so it would be "reg". | ||||
2882 | // If parameter is small enough to be saved in range [reg, r4), then | ||||
2883 | // the end (first after last) register would be reg + param-size-in-regs, | ||||
2884 | // else parameter would be splitted between registers and stack, | ||||
2885 | // end register would be r4 in this case. | ||||
2886 | unsigned ByValRegBegin = Reg; | ||||
2887 | unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); | ||||
2888 | State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); | ||||
2889 | // Note, first register is allocated in the beginning of function already, | ||||
2890 | // allocate remained amount of registers we need. | ||||
2891 | for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) | ||||
2892 | State->AllocateReg(GPRArgRegs); | ||||
2893 | // A byval parameter that is split between registers and memory needs its | ||||
2894 | // size truncated here. | ||||
2895 | // In the case where the entire structure fits in registers, we set the | ||||
2896 | // size in memory to zero. | ||||
2897 | Size = std::max<int>(Size - Excess, 0); | ||||
2898 | } | ||||
2899 | |||||
2900 | /// MatchingStackOffset - Return true if the given stack call argument is | ||||
2901 | /// already available in the same position (relatively) of the caller's | ||||
2902 | /// incoming argument stack. | ||||
2903 | static | ||||
2904 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, | ||||
2905 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, | ||||
2906 | const TargetInstrInfo *TII) { | ||||
2907 | unsigned Bytes = Arg.getValueSizeInBits() / 8; | ||||
2908 | int FI = std::numeric_limits<int>::max(); | ||||
2909 | if (Arg.getOpcode() == ISD::CopyFromReg) { | ||||
2910 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); | ||||
2911 | if (!Register::isVirtualRegister(VR)) | ||||
2912 | return false; | ||||
2913 | MachineInstr *Def = MRI->getVRegDef(VR); | ||||
2914 | if (!Def) | ||||
2915 | return false; | ||||
2916 | if (!Flags.isByVal()) { | ||||
2917 | if (!TII->isLoadFromStackSlot(*Def, FI)) | ||||
2918 | return false; | ||||
2919 | } else { | ||||
2920 | return false; | ||||
2921 | } | ||||
2922 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { | ||||
2923 | if (Flags.isByVal()) | ||||
2924 | // ByVal argument is passed in as a pointer but it's now being | ||||
2925 | // dereferenced. e.g. | ||||
2926 | // define @foo(%struct.X* %A) { | ||||
2927 | // tail call @bar(%struct.X* byval %A) | ||||
2928 | // } | ||||
2929 | return false; | ||||
2930 | SDValue Ptr = Ld->getBasePtr(); | ||||
2931 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); | ||||
2932 | if (!FINode) | ||||
2933 | return false; | ||||
2934 | FI = FINode->getIndex(); | ||||
2935 | } else | ||||
2936 | return false; | ||||
2937 | |||||
2938 | assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int> ::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2938, __extension__ __PRETTY_FUNCTION__)); | ||||
2939 | if (!MFI.isFixedObjectIndex(FI)) | ||||
2940 | return false; | ||||
2941 | return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); | ||||
2942 | } | ||||
2943 | |||||
2944 | /// IsEligibleForTailCallOptimization - Check whether the call is eligible | ||||
2945 | /// for tail call optimization. Targets which want to do tail call | ||||
2946 | /// optimization should implement this function. | ||||
2947 | bool ARMTargetLowering::IsEligibleForTailCallOptimization( | ||||
2948 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, | ||||
2949 | bool isCalleeStructRet, bool isCallerStructRet, | ||||
2950 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
2951 | const SmallVectorImpl<SDValue> &OutVals, | ||||
2952 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG, | ||||
2953 | const bool isIndirect) const { | ||||
2954 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2955 | const Function &CallerF = MF.getFunction(); | ||||
2956 | CallingConv::ID CallerCC = CallerF.getCallingConv(); | ||||
2957 | |||||
2958 | assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ? void (0) : __assert_fail ("Subtarget->supportsTailCall()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2958, __extension__ __PRETTY_FUNCTION__)); | ||||
2959 | |||||
2960 | // Indirect tail calls cannot be optimized for Thumb1 if the args | ||||
2961 | // to the call take up r0-r3. The reason is that there are no legal registers | ||||
2962 | // left to hold the pointer to the function to be called. | ||||
2963 | // Similarly, if the function uses return address sign and authentication, | ||||
2964 | // r12 is needed to hold the PAC and is not available to hold the callee | ||||
2965 | // address. | ||||
2966 | if (Outs.size() >= 4 && | ||||
2967 | (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { | ||||
2968 | if (Subtarget->isThumb1Only()) | ||||
2969 | return false; | ||||
2970 | // Conservatively assume the function spills LR. | ||||
2971 | if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) | ||||
2972 | return false; | ||||
2973 | } | ||||
2974 | |||||
2975 | // Look for obvious safe cases to perform tail call optimization that do not | ||||
2976 | // require ABI changes. This is what gcc calls sibcall. | ||||
2977 | |||||
2978 | // Exception-handling functions need a special set of instructions to indicate | ||||
2979 | // a return to the hardware. Tail-calling another function would probably | ||||
2980 | // break this. | ||||
2981 | if (CallerF.hasFnAttribute("interrupt")) | ||||
2982 | return false; | ||||
2983 | |||||
2984 | if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) | ||||
2985 | return CalleeCC == CallerCC; | ||||
2986 | |||||
2987 | // Also avoid sibcall optimization if either caller or callee uses struct | ||||
2988 | // return semantics. | ||||
2989 | if (isCalleeStructRet || isCallerStructRet) | ||||
2990 | return false; | ||||
2991 | |||||
2992 | // Externally-defined functions with weak linkage should not be | ||||
2993 | // tail-called on ARM when the OS does not support dynamic | ||||
2994 | // pre-emption of symbols, as the AAELF spec requires normal calls | ||||
2995 | // to undefined weak functions to be replaced with a NOP or jump to the | ||||
2996 | // next instruction. The behaviour of branch instructions in this | ||||
2997 | // situation (as used for tail calls) is implementation-defined, so we | ||||
2998 | // cannot rely on the linker replacing the tail call with a return. | ||||
2999 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { | ||||
3000 | const GlobalValue *GV = G->getGlobal(); | ||||
3001 | const Triple &TT = getTargetMachine().getTargetTriple(); | ||||
3002 | if (GV->hasExternalWeakLinkage() && | ||||
3003 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) | ||||
3004 | return false; | ||||
3005 | } | ||||
3006 | |||||
3007 | // Check that the call results are passed in the same way. | ||||
3008 | LLVMContext &C = *DAG.getContext(); | ||||
3009 | if (!CCState::resultsCompatible( | ||||
3010 | getEffectiveCallingConv(CalleeCC, isVarArg), | ||||
3011 | getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, | ||||
3012 | CCAssignFnForReturn(CalleeCC, isVarArg), | ||||
3013 | CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) | ||||
3014 | return false; | ||||
3015 | // The callee has to preserve all registers the caller needs to preserve. | ||||
3016 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
3017 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); | ||||
3018 | if (CalleeCC != CallerCC) { | ||||
3019 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); | ||||
3020 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) | ||||
3021 | return false; | ||||
3022 | } | ||||
3023 | |||||
3024 | // If Caller's vararg or byval argument has been split between registers and | ||||
3025 | // stack, do not perform tail call, since part of the argument is in caller's | ||||
3026 | // local frame. | ||||
3027 | const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); | ||||
3028 | if (AFI_Caller->getArgRegsSaveSize()) | ||||
3029 | return false; | ||||
3030 | |||||
3031 | // If the callee takes no arguments then go on to check the results of the | ||||
3032 | // call. | ||||
3033 | if (!Outs.empty()) { | ||||
3034 | // Check if stack adjustment is needed. For now, do not do this if any | ||||
3035 | // argument is passed on the stack. | ||||
3036 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
3037 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); | ||||
3038 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); | ||||
3039 | if (CCInfo.getNextStackOffset()) { | ||||
3040 | // Check if the arguments are already laid out in the right way as | ||||
3041 | // the caller's fixed stack objects. | ||||
3042 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
3043 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); | ||||
3044 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
3045 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | ||||
3046 | i != e; | ||||
3047 | ++i, ++realArgIdx) { | ||||
3048 | CCValAssign &VA = ArgLocs[i]; | ||||
3049 | EVT RegVT = VA.getLocVT(); | ||||
3050 | SDValue Arg = OutVals[realArgIdx]; | ||||
3051 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | ||||
3052 | if (VA.getLocInfo() == CCValAssign::Indirect) | ||||
3053 | return false; | ||||
3054 | if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { | ||||
3055 | // f64 and vector types are split into multiple registers or | ||||
3056 | // register/stack-slot combinations. The types will not match | ||||
3057 | // the registers; give up on memory f64 refs until we figure | ||||
3058 | // out what to do about this. | ||||
3059 | if (!VA.isRegLoc()) | ||||
3060 | return false; | ||||
3061 | if (!ArgLocs[++i].isRegLoc()) | ||||
3062 | return false; | ||||
3063 | if (RegVT == MVT::v2f64) { | ||||
3064 | if (!ArgLocs[++i].isRegLoc()) | ||||
3065 | return false; | ||||
3066 | if (!ArgLocs[++i].isRegLoc()) | ||||
3067 | return false; | ||||
3068 | } | ||||
3069 | } else if (!VA.isRegLoc()) { | ||||
3070 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, | ||||
3071 | MFI, MRI, TII)) | ||||
3072 | return false; | ||||
3073 | } | ||||
3074 | } | ||||
3075 | } | ||||
3076 | |||||
3077 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
3078 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) | ||||
3079 | return false; | ||||
3080 | } | ||||
3081 | |||||
3082 | return true; | ||||
3083 | } | ||||
3084 | |||||
3085 | bool | ||||
3086 | ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, | ||||
3087 | MachineFunction &MF, bool isVarArg, | ||||
3088 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
3089 | LLVMContext &Context) const { | ||||
3090 | SmallVector<CCValAssign, 16> RVLocs; | ||||
3091 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); | ||||
3092 | return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
3093 | } | ||||
3094 | |||||
3095 | static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, | ||||
3096 | const SDLoc &DL, SelectionDAG &DAG) { | ||||
3097 | const MachineFunction &MF = DAG.getMachineFunction(); | ||||
3098 | const Function &F = MF.getFunction(); | ||||
3099 | |||||
3100 | StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); | ||||
3101 | |||||
3102 | // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset | ||||
3103 | // version of the "preferred return address". These offsets affect the return | ||||
3104 | // instruction if this is a return from PL1 without hypervisor extensions. | ||||
3105 | // IRQ/FIQ: +4 "subs pc, lr, #4" | ||||
3106 | // SWI: 0 "subs pc, lr, #0" | ||||
3107 | // ABORT: +4 "subs pc, lr, #4" | ||||
3108 | // UNDEF: +4/+2 "subs pc, lr, #0" | ||||
3109 | // UNDEF varies depending on where the exception came from ARM or Thumb | ||||
3110 | // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. | ||||
3111 | |||||
3112 | int64_t LROffset; | ||||
3113 | if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || | ||||
3114 | IntKind == "ABORT") | ||||
3115 | LROffset = 4; | ||||
3116 | else if (IntKind == "SWI" || IntKind == "UNDEF") | ||||
3117 | LROffset = 0; | ||||
3118 | else | ||||
3119 | report_fatal_error("Unsupported interrupt attribute. If present, value " | ||||
3120 | "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); | ||||
3121 | |||||
3122 | RetOps.insert(RetOps.begin() + 1, | ||||
3123 | DAG.getConstant(LROffset, DL, MVT::i32, false)); | ||||
3124 | |||||
3125 | return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); | ||||
3126 | } | ||||
3127 | |||||
3128 | SDValue | ||||
3129 | ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | ||||
3130 | bool isVarArg, | ||||
3131 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
3132 | const SmallVectorImpl<SDValue> &OutVals, | ||||
3133 | const SDLoc &dl, SelectionDAG &DAG) const { | ||||
3134 | // CCValAssign - represent the assignment of the return value to a location. | ||||
3135 | SmallVector<CCValAssign, 16> RVLocs; | ||||
3136 | |||||
3137 | // CCState - Info about the registers and stack slots. | ||||
3138 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | ||||
3139 | *DAG.getContext()); | ||||
3140 | |||||
3141 | // Analyze outgoing return values. | ||||
3142 | CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
3143 | |||||
3144 | SDValue Flag; | ||||
3145 | SmallVector<SDValue, 4> RetOps; | ||||
3146 | RetOps.push_back(Chain); // Operand #0 = Chain (updated below) | ||||
3147 | bool isLittleEndian = Subtarget->isLittle(); | ||||
3148 | |||||
3149 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3150 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3151 | AFI->setReturnRegsCount(RVLocs.size()); | ||||
3152 | |||||
3153 | // Report error if cmse entry function returns structure through first ptr arg. | ||||
3154 | if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) { | ||||
3155 | // Note: using an empty SDLoc(), as the first line of the function is a | ||||
3156 | // better place to report than the last line. | ||||
3157 | DiagnosticInfoUnsupported Diag( | ||||
3158 | DAG.getMachineFunction().getFunction(), | ||||
3159 | "secure entry function would return value through pointer", | ||||
3160 | SDLoc().getDebugLoc()); | ||||
3161 | DAG.getContext()->diagnose(Diag); | ||||
3162 | } | ||||
3163 | |||||
3164 | // Copy the result values into the output registers. | ||||
3165 | for (unsigned i = 0, realRVLocIdx = 0; | ||||
3166 | i != RVLocs.size(); | ||||
3167 | ++i, ++realRVLocIdx) { | ||||
3168 | CCValAssign &VA = RVLocs[i]; | ||||
3169 | assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!" ) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3169, __extension__ __PRETTY_FUNCTION__)); | ||||
3170 | |||||
3171 | SDValue Arg = OutVals[realRVLocIdx]; | ||||
3172 | bool ReturnF16 = false; | ||||
3173 | |||||
3174 | if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { | ||||
3175 | // Half-precision return values can be returned like this: | ||||
3176 | // | ||||
3177 | // t11 f16 = fadd ... | ||||
3178 | // t12: i16 = bitcast t11 | ||||
3179 | // t13: i32 = zero_extend t12 | ||||
3180 | // t14: f32 = bitcast t13 <~~~~~~~ Arg | ||||
3181 | // | ||||
3182 | // to avoid code generation for bitcasts, we simply set Arg to the node | ||||
3183 | // that produces the f16 value, t11 in this case. | ||||
3184 | // | ||||
3185 | if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) { | ||||
3186 | SDValue ZE = Arg.getOperand(0); | ||||
3187 | if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) { | ||||
3188 | SDValue BC = ZE.getOperand(0); | ||||
3189 | if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) { | ||||
3190 | Arg = BC.getOperand(0); | ||||
3191 | ReturnF16 = true; | ||||
3192 | } | ||||
3193 | } | ||||
3194 | } | ||||
3195 | } | ||||
3196 | |||||
3197 | switch (VA.getLocInfo()) { | ||||
3198 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3198); | ||||
3199 | case CCValAssign::Full: break; | ||||
3200 | case CCValAssign::BCvt: | ||||
3201 | if (!ReturnF16) | ||||
3202 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
3203 | break; | ||||
3204 | } | ||||
3205 | |||||
3206 | // Mask f16 arguments if this is a CMSE nonsecure entry. | ||||
3207 | auto RetVT = Outs[realRVLocIdx].ArgVT; | ||||
3208 | if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) { | ||||
3209 | if (VA.needsCustom() && VA.getValVT() == MVT::f16) { | ||||
3210 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | ||||
3211 | } else { | ||||
3212 | auto LocBits = VA.getLocVT().getSizeInBits(); | ||||
3213 | auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits()); | ||||
3214 | SDValue Mask = | ||||
3215 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | ||||
3216 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | ||||
3217 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | ||||
3218 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
3219 | } | ||||
3220 | } | ||||
3221 | |||||
3222 | if (VA.needsCustom() && | ||||
3223 | (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) { | ||||
3224 | if (VA.getLocVT() == MVT::v2f64) { | ||||
3225 | // Extract the first half and return it in two registers. | ||||
3226 | SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
3227 | DAG.getConstant(0, dl, MVT::i32)); | ||||
3228 | SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
3229 | DAG.getVTList(MVT::i32, MVT::i32), Half); | ||||
3230 | |||||
3231 | Chain = | ||||
3232 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3233 | HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); | ||||
3234 | Flag = Chain.getValue(1); | ||||
3235 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3236 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3237 | Chain = | ||||
3238 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3239 | HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); | ||||
3240 | Flag = Chain.getValue(1); | ||||
3241 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3242 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3243 | |||||
3244 | // Extract the 2nd half and fall through to handle it as an f64 value. | ||||
3245 | Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
3246 | DAG.getConstant(1, dl, MVT::i32)); | ||||
3247 | } | ||||
3248 | // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is | ||||
3249 | // available. | ||||
3250 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
3251 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | ||||
3252 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3253 | fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); | ||||
3254 | Flag = Chain.getValue(1); | ||||
3255 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3256 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3257 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3258 | fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); | ||||
3259 | } else | ||||
3260 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); | ||||
3261 | |||||
3262 | // Guarantee that all emitted copies are | ||||
3263 | // stuck together, avoiding something bad. | ||||
3264 | Flag = Chain.getValue(1); | ||||
3265 | RetOps.push_back(DAG.getRegister( | ||||
3266 | VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT())); | ||||
3267 | } | ||||
3268 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
3269 | const MCPhysReg *I = | ||||
3270 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); | ||||
3271 | if (I) { | ||||
3272 | for (; *I; ++I) { | ||||
3273 | if (ARM::GPRRegClass.contains(*I)) | ||||
3274 | RetOps.push_back(DAG.getRegister(*I, MVT::i32)); | ||||
3275 | else if (ARM::DPRRegClass.contains(*I)) | ||||
3276 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); | ||||
3277 | else | ||||
3278 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3278); | ||||
3279 | } | ||||
3280 | } | ||||
3281 | |||||
3282 | // Update chain and glue. | ||||
3283 | RetOps[0] = Chain; | ||||
3284 | if (Flag.getNode()) | ||||
3285 | RetOps.push_back(Flag); | ||||
3286 | |||||
3287 | // CPUs which aren't M-class use a special sequence to return from | ||||
3288 | // exceptions (roughly, any instruction setting pc and cpsr simultaneously, | ||||
3289 | // though we use "subs pc, lr, #N"). | ||||
3290 | // | ||||
3291 | // M-class CPUs actually use a normal return sequence with a special | ||||
3292 | // (hardware-provided) value in LR, so the normal code path works. | ||||
3293 | if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && | ||||
3294 | !Subtarget->isMClass()) { | ||||
3295 | if (Subtarget->isThumb1Only()) | ||||
3296 | report_fatal_error("interrupt attribute is not supported in Thumb1"); | ||||
3297 | return LowerInterruptReturn(RetOps, dl, DAG); | ||||
3298 | } | ||||
3299 | |||||
3300 | ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG : | ||||
3301 | ARMISD::RET_FLAG; | ||||
3302 | return DAG.getNode(RetNode, dl, MVT::Other, RetOps); | ||||
3303 | } | ||||
3304 | |||||
3305 | bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { | ||||
3306 | if (N->getNumValues() != 1) | ||||
3307 | return false; | ||||
3308 | if (!N->hasNUsesOfValue(1, 0)) | ||||
3309 | return false; | ||||
3310 | |||||
3311 | SDValue TCChain = Chain; | ||||
3312 | SDNode *Copy = *N->use_begin(); | ||||
3313 | if (Copy->getOpcode() == ISD::CopyToReg) { | ||||
3314 | // If the copy has a glue operand, we conservatively assume it isn't safe to | ||||
3315 | // perform a tail call. | ||||
3316 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | ||||
3317 | return false; | ||||
3318 | TCChain = Copy->getOperand(0); | ||||
3319 | } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { | ||||
3320 | SDNode *VMov = Copy; | ||||
3321 | // f64 returned in a pair of GPRs. | ||||
3322 | SmallPtrSet<SDNode*, 2> Copies; | ||||
3323 | for (SDNode *U : VMov->uses()) { | ||||
3324 | if (U->getOpcode() != ISD::CopyToReg) | ||||
3325 | return false; | ||||
3326 | Copies.insert(U); | ||||
3327 | } | ||||
3328 | if (Copies.size() > 2) | ||||
3329 | return false; | ||||
3330 | |||||
3331 | for (SDNode *U : VMov->uses()) { | ||||
3332 | SDValue UseChain = U->getOperand(0); | ||||
3333 | if (Copies.count(UseChain.getNode())) | ||||
3334 | // Second CopyToReg | ||||
3335 | Copy = U; | ||||
3336 | else { | ||||
3337 | // We are at the top of this chain. | ||||
3338 | // If the copy has a glue operand, we conservatively assume it | ||||
3339 | // isn't safe to perform a tail call. | ||||
3340 | if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue) | ||||
3341 | return false; | ||||
3342 | // First CopyToReg | ||||
3343 | TCChain = UseChain; | ||||
3344 | } | ||||
3345 | } | ||||
3346 | } else if (Copy->getOpcode() == ISD::BITCAST) { | ||||
3347 | // f32 returned in a single GPR. | ||||
3348 | if (!Copy->hasOneUse()) | ||||
3349 | return false; | ||||
3350 | Copy = *Copy->use_begin(); | ||||
3351 | if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) | ||||
3352 | return false; | ||||
3353 | // If the copy has a glue operand, we conservatively assume it isn't safe to | ||||
3354 | // perform a tail call. | ||||
3355 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | ||||
3356 | return false; | ||||
3357 | TCChain = Copy->getOperand(0); | ||||
3358 | } else { | ||||
3359 | return false; | ||||
3360 | } | ||||
3361 | |||||
3362 | bool HasRet = false; | ||||
3363 | for (const SDNode *U : Copy->uses()) { | ||||
3364 | if (U->getOpcode() != ARMISD::RET_FLAG && | ||||
3365 | U->getOpcode() != ARMISD::INTRET_FLAG) | ||||
3366 | return false; | ||||
3367 | HasRet = true; | ||||
3368 | } | ||||
3369 | |||||
3370 | if (!HasRet) | ||||
3371 | return false; | ||||
3372 | |||||
3373 | Chain = TCChain; | ||||
3374 | return true; | ||||
3375 | } | ||||
3376 | |||||
3377 | bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { | ||||
3378 | if (!Subtarget->supportsTailCall()) | ||||
3379 | return false; | ||||
3380 | |||||
3381 | if (!CI->isTailCall()) | ||||
3382 | return false; | ||||
3383 | |||||
3384 | return true; | ||||
3385 | } | ||||
3386 | |||||
3387 | // Trying to write a 64 bit value so need to split into two 32 bit values first, | ||||
3388 | // and pass the lower and high parts through. | ||||
3389 | static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { | ||||
3390 | SDLoc DL(Op); | ||||
3391 | SDValue WriteValue = Op->getOperand(2); | ||||
3392 | |||||
3393 | // This function is only supposed to be called for i64 type argument. | ||||
3394 | assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3395, __extension__ __PRETTY_FUNCTION__)) | ||||
3395 | && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3395, __extension__ __PRETTY_FUNCTION__)); | ||||
3396 | |||||
3397 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, | ||||
3398 | DAG.getConstant(0, DL, MVT::i32)); | ||||
3399 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, | ||||
3400 | DAG.getConstant(1, DL, MVT::i32)); | ||||
3401 | SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; | ||||
3402 | return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); | ||||
3403 | } | ||||
3404 | |||||
3405 | // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as | ||||
3406 | // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is | ||||
3407 | // one of the above mentioned nodes. It has to be wrapped because otherwise | ||||
3408 | // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only | ||||
3409 | // be used to form addressing mode. These wrapped nodes will be selected | ||||
3410 | // into MOVi. | ||||
3411 | SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, | ||||
3412 | SelectionDAG &DAG) const { | ||||
3413 | EVT PtrVT = Op.getValueType(); | ||||
3414 | // FIXME there is no actual debug info here | ||||
3415 | SDLoc dl(Op); | ||||
3416 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); | ||||
3417 | SDValue Res; | ||||
3418 | |||||
3419 | // When generating execute-only code Constant Pools must be promoted to the | ||||
3420 | // global data section. It's a bit ugly that we can't share them across basic | ||||
3421 | // blocks, but this way we guarantee that execute-only behaves correct with | ||||
3422 | // position-independent addressing modes. | ||||
3423 | if (Subtarget->genExecuteOnly()) { | ||||
3424 | auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); | ||||
3425 | auto T = const_cast<Type*>(CP->getType()); | ||||
3426 | auto C = const_cast<Constant*>(CP->getConstVal()); | ||||
3427 | auto M = const_cast<Module*>(DAG.getMachineFunction(). | ||||
3428 | getFunction().getParent()); | ||||
3429 | auto GV = new GlobalVariable( | ||||
3430 | *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, | ||||
3431 | Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + | ||||
3432 | Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + | ||||
3433 | Twine(AFI->createPICLabelUId()) | ||||
3434 | ); | ||||
3435 | SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), | ||||
3436 | dl, PtrVT); | ||||
3437 | return LowerGlobalAddress(GA, DAG); | ||||
3438 | } | ||||
3439 | |||||
3440 | if (CP->isMachineConstantPoolEntry()) | ||||
3441 | Res = | ||||
3442 | DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); | ||||
3443 | else | ||||
3444 | Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign()); | ||||
3445 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); | ||||
3446 | } | ||||
3447 | |||||
3448 | unsigned ARMTargetLowering::getJumpTableEncoding() const { | ||||
3449 | return MachineJumpTableInfo::EK_Inline; | ||||
3450 | } | ||||
3451 | |||||
3452 | SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, | ||||
3453 | SelectionDAG &DAG) const { | ||||
3454 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3455 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3456 | unsigned ARMPCLabelIndex = 0; | ||||
3457 | SDLoc DL(Op); | ||||
3458 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3459 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); | ||||
3460 | SDValue CPAddr; | ||||
3461 | bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); | ||||
3462 | if (!IsPositionIndependent) { | ||||
3463 | CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4)); | ||||
3464 | } else { | ||||
3465 | unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3466 | ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3467 | ARMConstantPoolValue *CPV = | ||||
3468 | ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, | ||||
3469 | ARMCP::CPBlockAddress, PCAdj); | ||||
3470 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3471 | } | ||||
3472 | CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); | ||||
3473 | SDValue Result = DAG.getLoad( | ||||
3474 | PtrVT, DL, DAG.getEntryNode(), CPAddr, | ||||
3475 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3476 | if (!IsPositionIndependent) | ||||
3477 | return Result; | ||||
3478 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); | ||||
3479 | return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); | ||||
3480 | } | ||||
3481 | |||||
3482 | /// Convert a TLS address reference into the correct sequence of loads | ||||
3483 | /// and calls to compute the variable's address for Darwin, and return an | ||||
3484 | /// SDValue containing the final node. | ||||
3485 | |||||
3486 | /// Darwin only has one TLS scheme which must be capable of dealing with the | ||||
3487 | /// fully general situation, in the worst case. This means: | ||||
3488 | /// + "extern __thread" declaration. | ||||
3489 | /// + Defined in a possibly unknown dynamic library. | ||||
3490 | /// | ||||
3491 | /// The general system is that each __thread variable has a [3 x i32] descriptor | ||||
3492 | /// which contains information used by the runtime to calculate the address. The | ||||
3493 | /// only part of this the compiler needs to know about is the first word, which | ||||
3494 | /// contains a function pointer that must be called with the address of the | ||||
3495 | /// entire descriptor in "r0". | ||||
3496 | /// | ||||
3497 | /// Since this descriptor may be in a different unit, in general access must | ||||
3498 | /// proceed along the usual ARM rules. A common sequence to produce is: | ||||
3499 | /// | ||||
3500 | /// movw rT1, :lower16:_var$non_lazy_ptr | ||||
3501 | /// movt rT1, :upper16:_var$non_lazy_ptr | ||||
3502 | /// ldr r0, [rT1] | ||||
3503 | /// ldr rT2, [r0] | ||||
3504 | /// blx rT2 | ||||
3505 | /// [...address now in r0...] | ||||
3506 | SDValue | ||||
3507 | ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, | ||||
3508 | SelectionDAG &DAG) const { | ||||
3509 | assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3510, __extension__ __PRETTY_FUNCTION__)) | ||||
3510 | "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3510, __extension__ __PRETTY_FUNCTION__)); | ||||
3511 | SDLoc DL(Op); | ||||
3512 | |||||
3513 | // First step is to get the address of the actua global symbol. This is where | ||||
3514 | // the TLS descriptor lives. | ||||
3515 | SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); | ||||
3516 | |||||
3517 | // The first entry in the descriptor is a function pointer that we must call | ||||
3518 | // to obtain the address of the variable. | ||||
3519 | SDValue Chain = DAG.getEntryNode(); | ||||
3520 | SDValue FuncTLVGet = DAG.getLoad( | ||||
3521 | MVT::i32, DL, Chain, DescAddr, | ||||
3522 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), | ||||
3523 | MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | | ||||
3524 | MachineMemOperand::MOInvariant); | ||||
3525 | Chain = FuncTLVGet.getValue(1); | ||||
3526 | |||||
3527 | MachineFunction &F = DAG.getMachineFunction(); | ||||
3528 | MachineFrameInfo &MFI = F.getFrameInfo(); | ||||
3529 | MFI.setAdjustsStack(true); | ||||
3530 | |||||
3531 | // TLS calls preserve all registers except those that absolutely must be | ||||
3532 | // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be | ||||
3533 | // silly). | ||||
3534 | auto TRI = | ||||
3535 | getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); | ||||
3536 | auto ARI = static_cast<const ARMRegisterInfo *>(TRI); | ||||
3537 | const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); | ||||
3538 | |||||
3539 | // Finally, we can make the call. This is just a degenerate version of a | ||||
3540 | // normal AArch64 call node: r0 takes the address of the descriptor, and | ||||
3541 | // returns the address of the variable in this thread. | ||||
3542 | Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); | ||||
3543 | Chain = | ||||
3544 | DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), | ||||
3545 | Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), | ||||
3546 | DAG.getRegisterMask(Mask), Chain.getValue(1)); | ||||
3547 | return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); | ||||
3548 | } | ||||
3549 | |||||
3550 | SDValue | ||||
3551 | ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, | ||||
3552 | SelectionDAG &DAG) const { | ||||
3553 | assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows specific TLS lowering") ? void (0) : __assert_fail ( "Subtarget->isTargetWindows() && \"Windows specific TLS lowering\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3553, __extension__ __PRETTY_FUNCTION__)); | ||||
3554 | |||||
3555 | SDValue Chain = DAG.getEntryNode(); | ||||
3556 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3557 | SDLoc DL(Op); | ||||
3558 | |||||
3559 | // Load the current TEB (thread environment block) | ||||
3560 | SDValue Ops[] = {Chain, | ||||
3561 | DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), | ||||
3562 | DAG.getTargetConstant(15, DL, MVT::i32), | ||||
3563 | DAG.getTargetConstant(0, DL, MVT::i32), | ||||
3564 | DAG.getTargetConstant(13, DL, MVT::i32), | ||||
3565 | DAG.getTargetConstant(0, DL, MVT::i32), | ||||
3566 | DAG.getTargetConstant(2, DL, MVT::i32)}; | ||||
3567 | SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, | ||||
3568 | DAG.getVTList(MVT::i32, MVT::Other), Ops); | ||||
3569 | |||||
3570 | SDValue TEB = CurrentTEB.getValue(0); | ||||
3571 | Chain = CurrentTEB.getValue(1); | ||||
3572 | |||||
3573 | // Load the ThreadLocalStoragePointer from the TEB | ||||
3574 | // A pointer to the TLS array is located at offset 0x2c from the TEB. | ||||
3575 | SDValue TLSArray = | ||||
3576 | DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); | ||||
3577 | TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); | ||||
3578 | |||||
3579 | // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 | ||||
3580 | // offset into the TLSArray. | ||||
3581 | |||||
3582 | // Load the TLS index from the C runtime | ||||
3583 | SDValue TLSIndex = | ||||
3584 | DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); | ||||
3585 | TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); | ||||
3586 | TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); | ||||
3587 | |||||
3588 | SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, | ||||
3589 | DAG.getConstant(2, DL, MVT::i32)); | ||||
3590 | SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, | ||||
3591 | DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), | ||||
3592 | MachinePointerInfo()); | ||||
3593 | |||||
3594 | // Get the offset of the start of the .tls section (section base) | ||||
3595 | const auto *GA = cast<GlobalAddressSDNode>(Op); | ||||
3596 | auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); | ||||
3597 | SDValue Offset = DAG.getLoad( | ||||
3598 | PtrVT, DL, Chain, | ||||
3599 | DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, | ||||
3600 | DAG.getTargetConstantPool(CPV, PtrVT, Align(4))), | ||||
3601 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3602 | |||||
3603 | return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); | ||||
3604 | } | ||||
3605 | |||||
3606 | // Lower ISD::GlobalTLSAddress using the "general dynamic" model | ||||
3607 | SDValue | ||||
3608 | ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, | ||||
3609 | SelectionDAG &DAG) const { | ||||
3610 | SDLoc dl(GA); | ||||
3611 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3612 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3613 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3614 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3615 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3616 | ARMConstantPoolValue *CPV = | ||||
3617 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | ||||
3618 | ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); | ||||
3619 | SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3620 | Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); | ||||
3621 | Argument = DAG.getLoad( | ||||
3622 | PtrVT, dl, DAG.getEntryNode(), Argument, | ||||
3623 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3624 | SDValue Chain = Argument.getValue(1); | ||||
3625 | |||||
3626 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
3627 | Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); | ||||
3628 | |||||
3629 | // call __tls_get_addr. | ||||
3630 | ArgListTy Args; | ||||
3631 | ArgListEntry Entry; | ||||
3632 | Entry.Node = Argument; | ||||
3633 | Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); | ||||
3634 | Args.push_back(Entry); | ||||
3635 | |||||
3636 | // FIXME: is there useful debug info available here? | ||||
3637 | TargetLowering::CallLoweringInfo CLI(DAG); | ||||
3638 | CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( | ||||
3639 | CallingConv::C, Type::getInt32Ty(*DAG.getContext()), | ||||
3640 | DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); | ||||
3641 | |||||
3642 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); | ||||
3643 | return CallResult.first; | ||||
3644 | } | ||||
3645 | |||||
3646 | // Lower ISD::GlobalTLSAddress using the "initial exec" or | ||||
3647 | // "local exec" model. | ||||
3648 | SDValue | ||||
3649 | ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, | ||||
3650 | SelectionDAG &DAG, | ||||
3651 | TLSModel::Model model) const { | ||||
3652 | const GlobalValue *GV = GA->getGlobal(); | ||||
3653 | SDLoc dl(GA); | ||||
3654 | SDValue Offset; | ||||
3655 | SDValue Chain = DAG.getEntryNode(); | ||||
3656 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3657 | // Get the Thread Pointer | ||||
3658 | SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | ||||
3659 | |||||
3660 | if (model == TLSModel::InitialExec) { | ||||
3661 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3662 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3663 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3664 | // Initial exec model. | ||||
3665 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3666 | ARMConstantPoolValue *CPV = | ||||
3667 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | ||||
3668 | ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, | ||||
3669 | true); | ||||
3670 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3671 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | ||||
3672 | Offset = DAG.getLoad( | ||||
3673 | PtrVT, dl, Chain, Offset, | ||||
3674 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3675 | Chain = Offset.getValue(1); | ||||
3676 | |||||
3677 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
3678 | Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); | ||||
3679 | |||||
3680 | Offset = DAG.getLoad( | ||||
3681 | PtrVT, dl, Chain, Offset, | ||||
3682 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3683 | } else { | ||||
3684 | // local exec model | ||||
3685 | assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void (0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3685, __extension__ __PRETTY_FUNCTION__)); | ||||
3686 | ARMConstantPoolValue *CPV = | ||||
3687 | ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); | ||||
3688 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3689 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | ||||
3690 | Offset = DAG.getLoad( | ||||
3691 | PtrVT, dl, Chain, Offset, | ||||
3692 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3693 | } | ||||
3694 | |||||
3695 | // The address of the thread local variable is the add of the thread | ||||
3696 | // pointer with the offset of the variable. | ||||
3697 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); | ||||
3698 | } | ||||
3699 | |||||
3700 | SDValue | ||||
3701 | ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { | ||||
3702 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); | ||||
3703 | if (DAG.getTarget().useEmulatedTLS()) | ||||
3704 | return LowerToTLSEmulatedModel(GA, DAG); | ||||
3705 | |||||
3706 | if (Subtarget->isTargetDarwin()) | ||||
3707 | return LowerGlobalTLSAddressDarwin(Op, DAG); | ||||
3708 | |||||
3709 | if (Subtarget->isTargetWindows()) | ||||
3710 | return LowerGlobalTLSAddressWindows(Op, DAG); | ||||
3711 | |||||
3712 | // TODO: implement the "local dynamic" model | ||||
3713 | assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() && "Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3713, __extension__ __PRETTY_FUNCTION__)); | ||||
3714 | TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); | ||||
3715 | |||||
3716 | switch (model) { | ||||
3717 | case TLSModel::GeneralDynamic: | ||||
3718 | case TLSModel::LocalDynamic: | ||||
3719 | return LowerToTLSGeneralDynamicModel(GA, DAG); | ||||
3720 | case TLSModel::InitialExec: | ||||
3721 | case TLSModel::LocalExec: | ||||
3722 | return LowerToTLSExecModels(GA, DAG, model); | ||||
3723 | } | ||||
3724 | llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3724); | ||||
3725 | } | ||||
3726 | |||||
3727 | /// Return true if all users of V are within function F, looking through | ||||
3728 | /// ConstantExprs. | ||||
3729 | static bool allUsersAreInFunction(const Value *V, const Function *F) { | ||||
3730 | SmallVector<const User*,4> Worklist(V->users()); | ||||
3731 | while (!Worklist.empty()) { | ||||
3732 | auto *U = Worklist.pop_back_val(); | ||||
3733 | if (isa<ConstantExpr>(U)) { | ||||
3734 | append_range(Worklist, U->users()); | ||||
3735 | continue; | ||||
3736 | } | ||||
3737 | |||||
3738 | auto *I = dyn_cast<Instruction>(U); | ||||
3739 | if (!I || I->getParent()->getParent() != F) | ||||
3740 | return false; | ||||
3741 | } | ||||
3742 | return true; | ||||
3743 | } | ||||
3744 | |||||
3745 | static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, | ||||
3746 | const GlobalValue *GV, SelectionDAG &DAG, | ||||
3747 | EVT PtrVT, const SDLoc &dl) { | ||||
3748 | // If we're creating a pool entry for a constant global with unnamed address, | ||||
3749 | // and the global is small enough, we can emit it inline into the constant pool | ||||
3750 | // to save ourselves an indirection. | ||||
3751 | // | ||||
3752 | // This is a win if the constant is only used in one function (so it doesn't | ||||
3753 | // need to be duplicated) or duplicating the constant wouldn't increase code | ||||
3754 | // size (implying the constant is no larger than 4 bytes). | ||||
3755 | const Function &F = DAG.getMachineFunction().getFunction(); | ||||
3756 | |||||
3757 | // We rely on this decision to inline being idemopotent and unrelated to the | ||||
3758 | // use-site. We know that if we inline a variable at one use site, we'll | ||||
3759 | // inline it elsewhere too (and reuse the constant pool entry). Fast-isel | ||||
3760 | // doesn't know about this optimization, so bail out if it's enabled else | ||||
3761 | // we could decide to inline here (and thus never emit the GV) but require | ||||
3762 | // the GV from fast-isel generated code. | ||||
3763 | if (!EnableConstpoolPromotion || | ||||
3764 | DAG.getMachineFunction().getTarget().Options.EnableFastISel) | ||||
3765 | return SDValue(); | ||||
3766 | |||||
3767 | auto *GVar = dyn_cast<GlobalVariable>(GV); | ||||
3768 | if (!GVar || !GVar->hasInitializer() || | ||||
3769 | !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || | ||||
3770 | !GVar->hasLocalLinkage()) | ||||
3771 | return SDValue(); | ||||
3772 | |||||
3773 | // If we inline a value that contains relocations, we move the relocations | ||||
3774 | // from .data to .text. This is not allowed in position-independent code. | ||||
3775 | auto *Init = GVar->getInitializer(); | ||||
3776 | if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && | ||||
3777 | Init->needsDynamicRelocation()) | ||||
3778 | return SDValue(); | ||||
3779 | |||||
3780 | // The constant islands pass can only really deal with alignment requests | ||||
3781 | // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote | ||||
3782 | // any type wanting greater alignment requirements than 4 bytes. We also | ||||
3783 | // can only promote constants that are multiples of 4 bytes in size or | ||||
3784 | // are paddable to a multiple of 4. Currently we only try and pad constants | ||||
3785 | // that are strings for simplicity. | ||||
3786 | auto *CDAInit = dyn_cast<ConstantDataArray>(Init); | ||||
3787 | unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); | ||||
3788 | Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar); | ||||
3789 | unsigned RequiredPadding = 4 - (Size % 4); | ||||
3790 | bool PaddingPossible = | ||||
3791 | RequiredPadding == 4 || (CDAInit && CDAInit->isString()); | ||||
3792 | if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize || | ||||
3793 | Size == 0) | ||||
3794 | return SDValue(); | ||||
3795 | |||||
3796 | unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); | ||||
3797 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3798 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3799 | |||||
3800 | // We can't bloat the constant pool too much, else the ConstantIslands pass | ||||
3801 | // may fail to converge. If we haven't promoted this global yet (it may have | ||||
3802 | // multiple uses), and promoting it would increase the constant pool size (Sz | ||||
3803 | // > 4), ensure we have space to do so up to MaxTotal. | ||||
3804 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) | ||||
3805 | if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= | ||||
3806 | ConstpoolPromotionMaxTotal) | ||||
3807 | return SDValue(); | ||||
3808 | |||||
3809 | // This is only valid if all users are in a single function; we can't clone | ||||
3810 | // the constant in general. The LLVM IR unnamed_addr allows merging | ||||
3811 | // constants, but not cloning them. | ||||
3812 | // | ||||
3813 | // We could potentially allow cloning if we could prove all uses of the | ||||
3814 | // constant in the current function don't care about the address, like | ||||
3815 | // printf format strings. But that isn't implemented for now. | ||||
3816 | if (!allUsersAreInFunction(GVar, &F)) | ||||
3817 | return SDValue(); | ||||
3818 | |||||
3819 | // We're going to inline this global. Pad it out if needed. | ||||
3820 | if (RequiredPadding != 4) { | ||||
3821 | StringRef S = CDAInit->getAsString(); | ||||
3822 | |||||
3823 | SmallVector<uint8_t,16> V(S.size()); | ||||
3824 | std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); | ||||
3825 | while (RequiredPadding--) | ||||
3826 | V.push_back(0); | ||||
3827 | Init = ConstantDataArray::get(*DAG.getContext(), V); | ||||
3828 | } | ||||
3829 | |||||
3830 | auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); | ||||
3831 | SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4)); | ||||
3832 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { | ||||
3833 | AFI->markGlobalAsPromotedToConstantPool(GVar); | ||||
3834 | AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + | ||||
3835 | PaddedSize - 4); | ||||
3836 | } | ||||
3837 | ++NumConstpoolPromoted; | ||||
3838 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3839 | } | ||||
3840 | |||||
3841 | bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { | ||||
3842 | if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) | ||||
3843 | if (!(GV = GA->getAliaseeObject())) | ||||
3844 | return false; | ||||
3845 | if (const auto *V = dyn_cast<GlobalVariable>(GV)) | ||||
3846 | return V->isConstant(); | ||||
3847 | return isa<Function>(GV); | ||||
3848 | } | ||||
3849 | |||||
3850 | SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, | ||||
3851 | SelectionDAG &DAG) const { | ||||
3852 | switch (Subtarget->getTargetTriple().getObjectFormat()) { | ||||
3853 | default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3853); | ||||
3854 | case Triple::COFF: | ||||
3855 | return LowerGlobalAddressWindows(Op, DAG); | ||||
3856 | case Triple::ELF: | ||||
3857 | return LowerGlobalAddressELF(Op, DAG); | ||||
3858 | case Triple::MachO: | ||||
3859 | return LowerGlobalAddressDarwin(Op, DAG); | ||||
3860 | } | ||||
3861 | } | ||||
3862 | |||||
3863 | SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, | ||||
3864 | SelectionDAG &DAG) const { | ||||
3865 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3866 | SDLoc dl(Op); | ||||
3867 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
3868 | const TargetMachine &TM = getTargetMachine(); | ||||
3869 | bool IsRO = isReadOnly(GV); | ||||
3870 | |||||
3871 | // promoteToConstantPool only if not generating XO text section | ||||
3872 | if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) | ||||
3873 | if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) | ||||
3874 | return V; | ||||
3875 | |||||
3876 | if (isPositionIndependent()) { | ||||
3877 | bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); | ||||
3878 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, | ||||
3879 | UseGOT_PREL ? ARMII::MO_GOT : 0); | ||||
3880 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | ||||
3881 | if (UseGOT_PREL) | ||||
3882 | Result = | ||||
3883 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | ||||
3884 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
3885 | return Result; | ||||
3886 | } else if (Subtarget->isROPI() && IsRO) { | ||||
3887 | // PC-relative. | ||||
3888 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); | ||||
3889 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | ||||
3890 | return Result; | ||||
3891 | } else if (Subtarget->isRWPI() && !IsRO) { | ||||
3892 | // SB-relative. | ||||
3893 | SDValue RelAddr; | ||||
3894 | if (Subtarget->useMovt()) { | ||||
3895 | ++NumMovwMovt; | ||||
3896 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); | ||||
3897 | RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); | ||||
3898 | } else { // use literal pool for address constant | ||||
3899 | ARMConstantPoolValue *CPV = | ||||
3900 | ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); | ||||
3901 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3902 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3903 | RelAddr = DAG.getLoad( | ||||
3904 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
3905 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3906 | } | ||||
3907 | SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); | ||||
3908 | SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); | ||||
3909 | return Result; | ||||
3910 | } | ||||
3911 | |||||
3912 | // If we have T2 ops, we can materialize the address directly via movt/movw | ||||
3913 | // pair. This is always cheaper. | ||||
3914 | if (Subtarget->useMovt()) { | ||||
3915 | ++NumMovwMovt; | ||||
3916 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
3917 | // operands, expand this into two nodes. | ||||
3918 | return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, | ||||
3919 | DAG.getTargetGlobalAddress(GV, dl, PtrVT)); | ||||
3920 | } else { | ||||
3921 | SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); | ||||
3922 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3923 | return DAG.getLoad( | ||||
3924 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
3925 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3926 | } | ||||
3927 | } | ||||
3928 | |||||
3929 | SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, | ||||
3930 | SelectionDAG &DAG) const { | ||||
3931 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3932, __extension__ __PRETTY_FUNCTION__)) | ||||
3932 | "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3932, __extension__ __PRETTY_FUNCTION__)); | ||||
3933 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3934 | SDLoc dl(Op); | ||||
3935 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
3936 | |||||
3937 | if (Subtarget->useMovt()) | ||||
3938 | ++NumMovwMovt; | ||||
3939 | |||||
3940 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
3941 | // operands, expand this into multiple nodes | ||||
3942 | unsigned Wrapper = | ||||
3943 | isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; | ||||
3944 | |||||
3945 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); | ||||
3946 | SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); | ||||
3947 | |||||
3948 | if (Subtarget->isGVIndirectSymbol(GV)) | ||||
3949 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | ||||
3950 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
3951 | return Result; | ||||
3952 | } | ||||
3953 | |||||
3954 | SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, | ||||
3955 | SelectionDAG &DAG) const { | ||||
3956 | assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() && "non-Windows COFF is not supported") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3956, __extension__ __PRETTY_FUNCTION__)); | ||||
3957 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3958, __extension__ __PRETTY_FUNCTION__)) | ||||
3958 | "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3958, __extension__ __PRETTY_FUNCTION__)); | ||||
3959 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3960, __extension__ __PRETTY_FUNCTION__)) | ||||
3960 | "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3960, __extension__ __PRETTY_FUNCTION__)); | ||||
3961 | |||||
3962 | const TargetMachine &TM = getTargetMachine(); | ||||
3963 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
3964 | ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG; | ||||
3965 | if (GV->hasDLLImportStorageClass()) | ||||
3966 | TargetFlags = ARMII::MO_DLLIMPORT; | ||||
3967 | else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) | ||||
3968 | TargetFlags = ARMII::MO_COFFSTUB; | ||||
3969 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3970 | SDValue Result; | ||||
3971 | SDLoc DL(Op); | ||||
3972 | |||||
3973 | ++NumMovwMovt; | ||||
3974 | |||||
3975 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
3976 | // operands, expand this into two nodes. | ||||
3977 | Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, | ||||
3978 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0, | ||||
3979 | TargetFlags)); | ||||
3980 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | ||||
3981 | Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, | ||||
3982 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
3983 | return Result; | ||||
3984 | } | ||||
3985 | |||||
3986 | SDValue | ||||
3987 | ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { | ||||
3988 | SDLoc dl(Op); | ||||
3989 | SDValue Val = DAG.getConstant(0, dl, MVT::i32); | ||||
3990 | return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, | ||||
3991 | DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), | ||||
3992 | Op.getOperand(1), Val); | ||||
3993 | } | ||||
3994 | |||||
3995 | SDValue | ||||
3996 | ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { | ||||
3997 | SDLoc dl(Op); | ||||
3998 | return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), | ||||
3999 | Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); | ||||
4000 | } | ||||
4001 | |||||
4002 | SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, | ||||
4003 | SelectionDAG &DAG) const { | ||||
4004 | SDLoc dl(Op); | ||||
4005 | return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, | ||||
4006 | Op.getOperand(0)); | ||||
4007 | } | ||||
4008 | |||||
4009 | SDValue ARMTargetLowering::LowerINTRINSIC_VOID( | ||||
4010 | SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { | ||||
4011 | unsigned IntNo = | ||||
4012 | cast<ConstantSDNode>( | ||||
4013 | Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) | ||||
4014 | ->getZExtValue(); | ||||
4015 | switch (IntNo) { | ||||
4016 | default: | ||||
4017 | return SDValue(); // Don't custom lower most intrinsics. | ||||
4018 | case Intrinsic::arm_gnu_eabi_mcount: { | ||||
4019 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4020 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4021 | SDLoc dl(Op); | ||||
4022 | SDValue Chain = Op.getOperand(0); | ||||
4023 | // call "\01__gnu_mcount_nc" | ||||
4024 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | ||||
4025 | const uint32_t *Mask = | ||||
4026 | ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); | ||||
4027 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4027, __extension__ __PRETTY_FUNCTION__)); | ||||
4028 | // Mark LR an implicit live-in. | ||||
4029 | Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); | ||||
4030 | SDValue ReturnAddress = | ||||
4031 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); | ||||
4032 | constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue}; | ||||
4033 | SDValue Callee = | ||||
4034 | DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); | ||||
4035 | SDValue RegisterMask = DAG.getRegisterMask(Mask); | ||||
4036 | if (Subtarget->isThumb()) | ||||
4037 | return SDValue( | ||||
4038 | DAG.getMachineNode( | ||||
4039 | ARM::tBL_PUSHLR, dl, ResultTys, | ||||
4040 | {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), | ||||
4041 | DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), | ||||
4042 | 0); | ||||
4043 | return SDValue( | ||||
4044 | DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, | ||||
4045 | {ReturnAddress, Callee, RegisterMask, Chain}), | ||||
4046 | 0); | ||||
4047 | } | ||||
4048 | } | ||||
4049 | } | ||||
4050 | |||||
4051 | SDValue | ||||
4052 | ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, | ||||
4053 | const ARMSubtarget *Subtarget) const { | ||||
4054 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | ||||
4055 | SDLoc dl(Op); | ||||
4056 | switch (IntNo) { | ||||
4057 | default: return SDValue(); // Don't custom lower most intrinsics. | ||||
4058 | case Intrinsic::thread_pointer: { | ||||
4059 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4060 | return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | ||||
4061 | } | ||||
4062 | case Intrinsic::arm_cls: { | ||||
4063 | const SDValue &Operand = Op.getOperand(1); | ||||
4064 | const EVT VTy = Op.getValueType(); | ||||
4065 | SDValue SRA = | ||||
4066 | DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); | ||||
4067 | SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); | ||||
4068 | SDValue SHL = | ||||
4069 | DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); | ||||
4070 | SDValue OR = | ||||
4071 | DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); | ||||
4072 | SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); | ||||
4073 | return Result; | ||||
4074 | } | ||||
4075 | case Intrinsic::arm_cls64: { | ||||
4076 | // cls(x) = if cls(hi(x)) != 31 then cls(hi(x)) | ||||
4077 | // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x))) | ||||
4078 | const SDValue &Operand = Op.getOperand(1); | ||||
4079 | const EVT VTy = Op.getValueType(); | ||||
4080 | |||||
4081 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, | ||||
4082 | DAG.getConstant(1, dl, VTy)); | ||||
4083 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, | ||||
4084 | DAG.getConstant(0, dl, VTy)); | ||||
4085 | SDValue Constant0 = DAG.getConstant(0, dl, VTy); | ||||
4086 | SDValue Constant1 = DAG.getConstant(1, dl, VTy); | ||||
4087 | SDValue Constant31 = DAG.getConstant(31, dl, VTy); | ||||
4088 | SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31); | ||||
4089 | SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi); | ||||
4090 | SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1); | ||||
4091 | SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1); | ||||
4092 | SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi); | ||||
4093 | SDValue CheckLo = | ||||
4094 | DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ); | ||||
4095 | SDValue HiIsZero = | ||||
4096 | DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ); | ||||
4097 | SDValue AdjustedLo = | ||||
4098 | DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy)); | ||||
4099 | SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo); | ||||
4100 | SDValue Result = | ||||
4101 | DAG.getSelect(dl, VTy, CheckLo, | ||||
4102 | DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi); | ||||
4103 | return Result; | ||||
4104 | } | ||||
4105 | case Intrinsic::eh_sjlj_lsda: { | ||||
4106 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4107 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4108 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
4109 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4110 | SDValue CPAddr; | ||||
4111 | bool IsPositionIndependent = isPositionIndependent(); | ||||
4112 | unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; | ||||
4113 | ARMConstantPoolValue *CPV = | ||||
4114 | ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, | ||||
4115 | ARMCP::CPLSDA, PCAdj); | ||||
4116 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
4117 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
4118 | SDValue Result = DAG.getLoad( | ||||
4119 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
4120 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
4121 | |||||
4122 | if (IsPositionIndependent) { | ||||
4123 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
4124 | Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); | ||||
4125 | } | ||||
4126 | return Result; | ||||
4127 | } | ||||
4128 | case Intrinsic::arm_neon_vabs: | ||||
4129 | return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), | ||||
4130 | Op.getOperand(1)); | ||||
4131 | case Intrinsic::arm_neon_vmulls: | ||||
4132 | case Intrinsic::arm_neon_vmullu: { | ||||
4133 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) | ||||
4134 | ? ARMISD::VMULLs : ARMISD::VMULLu; | ||||
4135 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4136 | Op.getOperand(1), Op.getOperand(2)); | ||||
4137 | } | ||||
4138 | case Intrinsic::arm_neon_vminnm: | ||||
4139 | case Intrinsic::arm_neon_vmaxnm: { | ||||
4140 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) | ||||
4141 | ? ISD::FMINNUM : ISD::FMAXNUM; | ||||
4142 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4143 | Op.getOperand(1), Op.getOperand(2)); | ||||
4144 | } | ||||
4145 | case Intrinsic::arm_neon_vminu: | ||||
4146 | case Intrinsic::arm_neon_vmaxu: { | ||||
4147 | if (Op.getValueType().isFloatingPoint()) | ||||
4148 | return SDValue(); | ||||
4149 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) | ||||
4150 | ? ISD::UMIN : ISD::UMAX; | ||||
4151 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4152 | Op.getOperand(1), Op.getOperand(2)); | ||||
4153 | } | ||||
4154 | case Intrinsic::arm_neon_vmins: | ||||
4155 | case Intrinsic::arm_neon_vmaxs: { | ||||
4156 | // v{min,max}s is overloaded between signed integers and floats. | ||||
4157 | if (!Op.getValueType().isFloatingPoint()) { | ||||
4158 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | ||||
4159 | ? ISD::SMIN : ISD::SMAX; | ||||
4160 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4161 | Op.getOperand(1), Op.getOperand(2)); | ||||
4162 | } | ||||
4163 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | ||||
4164 | ? ISD::FMINIMUM : ISD::FMAXIMUM; | ||||
4165 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4166 | Op.getOperand(1), Op.getOperand(2)); | ||||
4167 | } | ||||
4168 | case Intrinsic::arm_neon_vtbl1: | ||||
4169 | return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), | ||||
4170 | Op.getOperand(1), Op.getOperand(2)); | ||||
4171 | case Intrinsic::arm_neon_vtbl2: | ||||
4172 | return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), | ||||
4173 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4174 | case Intrinsic::arm_mve_pred_i2v: | ||||
4175 | case Intrinsic::arm_mve_pred_v2i: | ||||
4176 | return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(), | ||||
4177 | Op.getOperand(1)); | ||||
4178 | case Intrinsic::arm_mve_vreinterpretq: | ||||
4179 | return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), | ||||
4180 | Op.getOperand(1)); | ||||
4181 | case Intrinsic::arm_mve_lsll: | ||||
4182 | return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), | ||||
4183 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4184 | case Intrinsic::arm_mve_asrl: | ||||
4185 | return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), | ||||
4186 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4187 | } | ||||
4188 | } | ||||
4189 | |||||
4190 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, | ||||
4191 | const ARMSubtarget *Subtarget) { | ||||
4192 | SDLoc dl(Op); | ||||
4193 | ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); | ||||
4194 | auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); | ||||
4195 | if (SSID == SyncScope::SingleThread) | ||||
4196 | return Op; | ||||
4197 | |||||
4198 | if (!Subtarget->hasDataBarrier()) { | ||||
4199 | // Some ARMv6 cpus can support data barriers with an mcr instruction. | ||||
4200 | // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get | ||||
4201 | // here. | ||||
4202 | assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4203, __extension__ __PRETTY_FUNCTION__)) | ||||
4203 | "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4203, __extension__ __PRETTY_FUNCTION__)); | ||||
4204 | return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), | ||||
4205 | DAG.getConstant(0, dl, MVT::i32)); | ||||
4206 | } | ||||
4207 | |||||
4208 | ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); | ||||
4209 | AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); | ||||
4210 | ARM_MB::MemBOpt Domain = ARM_MB::ISH; | ||||
4211 | if (Subtarget->isMClass()) { | ||||
4212 | // Only a full system barrier exists in the M-class architectures. | ||||
4213 | Domain = ARM_MB::SY; | ||||
4214 | } else if (Subtarget->preferISHSTBarriers() && | ||||
4215 | Ord == AtomicOrdering::Release) { | ||||
4216 | // Swift happens to implement ISHST barriers in a way that's compatible with | ||||
4217 | // Release semantics but weaker than ISH so we'd be fools not to use | ||||
4218 | // it. Beware: other processors probably don't! | ||||
4219 | Domain = ARM_MB::ISHST; | ||||
4220 | } | ||||
4221 | |||||
4222 | return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), | ||||
4223 | DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), | ||||
4224 | DAG.getConstant(Domain, dl, MVT::i32)); | ||||
4225 | } | ||||
4226 | |||||
4227 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, | ||||
4228 | const ARMSubtarget *Subtarget) { | ||||
4229 | // ARM pre v5TE and Thumb1 does not have preload instructions. | ||||
4230 | if (!(Subtarget->isThumb2() || | ||||
4231 | (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) | ||||
4232 | // Just preserve the chain. | ||||
4233 | return Op.getOperand(0); | ||||
4234 | |||||
4235 | SDLoc dl(Op); | ||||
4236 | unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; | ||||
4237 | if (!isRead && | ||||
4238 | (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) | ||||
4239 | // ARMv7 with MP extension has PLDW. | ||||
4240 | return Op.getOperand(0); | ||||
4241 | |||||
4242 | unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); | ||||
4243 | if (Subtarget->isThumb()) { | ||||
4244 | // Invert the bits. | ||||
4245 | isRead = ~isRead & 1; | ||||
4246 | isData = ~isData & 1; | ||||
4247 | } | ||||
4248 | |||||
4249 | return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), | ||||
4250 | Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), | ||||
4251 | DAG.getConstant(isData, dl, MVT::i32)); | ||||
4252 | } | ||||
4253 | |||||
4254 | static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { | ||||
4255 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4256 | ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); | ||||
4257 | |||||
4258 | // vastart just stores the address of the VarArgsFrameIndex slot into the | ||||
4259 | // memory location argument. | ||||
4260 | SDLoc dl(Op); | ||||
4261 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | ||||
4262 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); | ||||
4263 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); | ||||
4264 | return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), | ||||
4265 | MachinePointerInfo(SV)); | ||||
4266 | } | ||||
4267 | |||||
4268 | SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, | ||||
4269 | CCValAssign &NextVA, | ||||
4270 | SDValue &Root, | ||||
4271 | SelectionDAG &DAG, | ||||
4272 | const SDLoc &dl) const { | ||||
4273 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4274 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4275 | |||||
4276 | const TargetRegisterClass *RC; | ||||
4277 | if (AFI->isThumb1OnlyFunction()) | ||||
4278 | RC = &ARM::tGPRRegClass; | ||||
4279 | else | ||||
4280 | RC = &ARM::GPRRegClass; | ||||
4281 | |||||
4282 | // Transform the arguments stored in physical registers into virtual ones. | ||||
4283 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | ||||
4284 | SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | ||||
4285 | |||||
4286 | SDValue ArgValue2; | ||||
4287 | if (NextVA.isMemLoc()) { | ||||
4288 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4289 | int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); | ||||
4290 | |||||
4291 | // Create load node to retrieve arguments from the stack. | ||||
4292 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); | ||||
4293 | ArgValue2 = DAG.getLoad( | ||||
4294 | MVT::i32, dl, Root, FIN, | ||||
4295 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | ||||
4296 | } else { | ||||
4297 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); | ||||
4298 | ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | ||||
4299 | } | ||||
4300 | if (!Subtarget->isLittle()) | ||||
4301 | std::swap (ArgValue, ArgValue2); | ||||
4302 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); | ||||
4303 | } | ||||
4304 | |||||
4305 | // The remaining GPRs hold either the beginning of variable-argument | ||||
4306 | // data, or the beginning of an aggregate passed by value (usually | ||||
4307 | // byval). Either way, we allocate stack slots adjacent to the data | ||||
4308 | // provided by our caller, and store the unallocated registers there. | ||||
4309 | // If this is a variadic function, the va_list pointer will begin with | ||||
4310 | // these values; otherwise, this reassembles a (byval) structure that | ||||
4311 | // was split between registers and memory. | ||||
4312 | // Return: The frame index registers were stored into. | ||||
4313 | int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, | ||||
4314 | const SDLoc &dl, SDValue &Chain, | ||||
4315 | const Value *OrigArg, | ||||
4316 | unsigned InRegsParamRecordIdx, | ||||
4317 | int ArgOffset, unsigned ArgSize) const { | ||||
4318 | // Currently, two use-cases possible: | ||||
4319 | // Case #1. Non-var-args function, and we meet first byval parameter. | ||||
4320 | // Setup first unallocated register as first byval register; | ||||
4321 | // eat all remained registers | ||||
4322 | // (these two actions are performed by HandleByVal method). | ||||
4323 | // Then, here, we initialize stack frame with | ||||
4324 | // "store-reg" instructions. | ||||
4325 | // Case #2. Var-args function, that doesn't contain byval parameters. | ||||
4326 | // The same: eat all remained unallocated registers, | ||||
4327 | // initialize stack frame. | ||||
4328 | |||||
4329 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4330 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4331 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4332 | unsigned RBegin, REnd; | ||||
4333 | if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { | ||||
4334 | CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); | ||||
4335 | } else { | ||||
4336 | unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | ||||
4337 | RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; | ||||
4338 | REnd = ARM::R4; | ||||
4339 | } | ||||
4340 | |||||
4341 | if (REnd != RBegin) | ||||
4342 | ArgOffset = -4 * (ARM::R4 - RBegin); | ||||
4343 | |||||
4344 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4345 | int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); | ||||
4346 | SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); | ||||
4347 | |||||
4348 | SmallVector<SDValue, 4> MemOps; | ||||
4349 | const TargetRegisterClass *RC = | ||||
4350 | AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; | ||||
4351 | |||||
4352 | for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { | ||||
4353 | Register VReg = MF.addLiveIn(Reg, RC); | ||||
4354 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); | ||||
4355 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, | ||||
4356 | MachinePointerInfo(OrigArg, 4 * i)); | ||||
4357 | MemOps.push_back(Store); | ||||
4358 | FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); | ||||
4359 | } | ||||
4360 | |||||
4361 | if (!MemOps.empty()) | ||||
4362 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); | ||||
4363 | return FrameIndex; | ||||
4364 | } | ||||
4365 | |||||
4366 | // Setup stack frame, the va_list pointer will start from. | ||||
4367 | void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, | ||||
4368 | const SDLoc &dl, SDValue &Chain, | ||||
4369 | unsigned ArgOffset, | ||||
4370 | unsigned TotalArgRegsSaveSize, | ||||
4371 | bool ForceMutable) const { | ||||
4372 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4373 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4374 | |||||
4375 | // Try to store any remaining integer argument regs | ||||
4376 | // to their spots on the stack so that they may be loaded by dereferencing | ||||
4377 | // the result of va_next. | ||||
4378 | // If there is no regs to be stored, just point address after last | ||||
4379 | // argument passed via stack. | ||||
4380 | int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, | ||||
4381 | CCInfo.getInRegsParamsCount(), | ||||
4382 | CCInfo.getNextStackOffset(), | ||||
4383 | std::max(4U, TotalArgRegsSaveSize)); | ||||
4384 | AFI->setVarArgsFrameIndex(FrameIndex); | ||||
4385 | } | ||||
4386 | |||||
4387 | bool ARMTargetLowering::splitValueIntoRegisterParts( | ||||
4388 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, | ||||
4389 | unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { | ||||
4390 | bool IsABIRegCopy = CC.hasValue(); | ||||
4391 | EVT ValueVT = Val.getValueType(); | ||||
4392 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && | ||||
4393 | PartVT == MVT::f32) { | ||||
4394 | unsigned ValueBits = ValueVT.getSizeInBits(); | ||||
4395 | unsigned PartBits = PartVT.getSizeInBits(); | ||||
4396 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); | ||||
4397 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); | ||||
4398 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); | ||||
4399 | Parts[0] = Val; | ||||
4400 | return true; | ||||
4401 | } | ||||
4402 | return false; | ||||
4403 | } | ||||
4404 | |||||
4405 | SDValue ARMTargetLowering::joinRegisterPartsIntoValue( | ||||
4406 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, | ||||
4407 | MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { | ||||
4408 | bool IsABIRegCopy = CC.hasValue(); | ||||
4409 | if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && | ||||
4410 | PartVT == MVT::f32) { | ||||
4411 | unsigned ValueBits = ValueVT.getSizeInBits(); | ||||
4412 | unsigned PartBits = PartVT.getSizeInBits(); | ||||
4413 | SDValue Val = Parts[0]; | ||||
4414 | |||||
4415 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); | ||||
4416 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); | ||||
4417 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); | ||||
4418 | return Val; | ||||
4419 | } | ||||
4420 | return SDValue(); | ||||
4421 | } | ||||
4422 | |||||
4423 | SDValue ARMTargetLowering::LowerFormalArguments( | ||||
4424 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | ||||
4425 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | ||||
4426 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | ||||
4427 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4428 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4429 | |||||
4430 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4431 | |||||
4432 | // Assign locations to all of the incoming arguments. | ||||
4433 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
4434 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | ||||
4435 | *DAG.getContext()); | ||||
4436 | CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); | ||||
4437 | |||||
4438 | SmallVector<SDValue, 16> ArgValues; | ||||
4439 | SDValue ArgValue; | ||||
4440 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); | ||||
4441 | unsigned CurArgIdx = 0; | ||||
4442 | |||||
4443 | // Initially ArgRegsSaveSize is zero. | ||||
4444 | // Then we increase this value each time we meet byval parameter. | ||||
4445 | // We also increase this value in case of varargs function. | ||||
4446 | AFI->setArgRegsSaveSize(0); | ||||
4447 | |||||
4448 | // Calculate the amount of stack space that we need to allocate to store | ||||
4449 | // byval and variadic arguments that are passed in registers. | ||||
4450 | // We need to know this before we allocate the first byval or variadic | ||||
4451 | // argument, as they will be allocated a stack slot below the CFA (Canonical | ||||
4452 | // Frame Address, the stack pointer at entry to the function). | ||||
4453 | unsigned ArgRegBegin = ARM::R4; | ||||
4454 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | ||||
4455 | if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) | ||||
4456 | break; | ||||
4457 | |||||
4458 | CCValAssign &VA = ArgLocs[i]; | ||||
4459 | unsigned Index = VA.getValNo(); | ||||
4460 | ISD::ArgFlagsTy Flags = Ins[Index].Flags; | ||||
4461 | if (!Flags.isByVal()) | ||||
4462 | continue; | ||||
4463 | |||||
4464 | assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg" ) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4464, __extension__ __PRETTY_FUNCTION__)); | ||||
4465 | unsigned RBegin, REnd; | ||||
4466 | CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); | ||||
4467 | ArgRegBegin = std::min(ArgRegBegin, RBegin); | ||||
4468 | |||||
4469 | CCInfo.nextInRegsParam(); | ||||
4470 | } | ||||
4471 | CCInfo.rewindByValRegsInfo(); | ||||
4472 | |||||
4473 | int lastInsIndex = -1; | ||||
4474 | if (isVarArg && MFI.hasVAStart()) { | ||||
4475 | unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | ||||
4476 | if (RegIdx != array_lengthof(GPRArgRegs)) | ||||
4477 | ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); | ||||
4478 | } | ||||
4479 | |||||
4480 | unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); | ||||
4481 | AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); | ||||
4482 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4483 | |||||
4484 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | ||||
4485 | CCValAssign &VA = ArgLocs[i]; | ||||
4486 | if (Ins[VA.getValNo()].isOrigArg()) { | ||||
4487 | std::advance(CurOrigArg, | ||||
4488 | Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); | ||||
4489 | CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); | ||||
4490 | } | ||||
4491 | // Arguments stored in registers. | ||||
4492 | if (VA.isRegLoc()) { | ||||
4493 | EVT RegVT = VA.getLocVT(); | ||||
4494 | |||||
4495 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | ||||
4496 | // f64 and vector types are split up into multiple registers or | ||||
4497 | // combinations of registers and stack slots. | ||||
4498 | SDValue ArgValue1 = | ||||
4499 | GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4500 | VA = ArgLocs[++i]; // skip ahead to next loc | ||||
4501 | SDValue ArgValue2; | ||||
4502 | if (VA.isMemLoc()) { | ||||
4503 | int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); | ||||
4504 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | ||||
4505 | ArgValue2 = DAG.getLoad( | ||||
4506 | MVT::f64, dl, Chain, FIN, | ||||
4507 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | ||||
4508 | } else { | ||||
4509 | ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4510 | } | ||||
4511 | ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | ||||
4512 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | ||||
4513 | ArgValue1, DAG.getIntPtrConstant(0, dl)); | ||||
4514 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | ||||
4515 | ArgValue2, DAG.getIntPtrConstant(1, dl)); | ||||
4516 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | ||||
4517 | ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4518 | } else { | ||||
4519 | const TargetRegisterClass *RC; | ||||
4520 | |||||
4521 | if (RegVT == MVT::f16 || RegVT == MVT::bf16) | ||||
4522 | RC = &ARM::HPRRegClass; | ||||
4523 | else if (RegVT == MVT::f32) | ||||
4524 | RC = &ARM::SPRRegClass; | ||||
4525 | else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 || | ||||
4526 | RegVT == MVT::v4bf16) | ||||
4527 | RC = &ARM::DPRRegClass; | ||||
4528 | else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 || | ||||
4529 | RegVT == MVT::v8bf16) | ||||
4530 | RC = &ARM::QPRRegClass; | ||||
4531 | else if (RegVT == MVT::i32) | ||||
4532 | RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass | ||||
4533 | : &ARM::GPRRegClass; | ||||
4534 | else | ||||
4535 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4535); | ||||
4536 | |||||
4537 | // Transform the arguments in physical registers into virtual ones. | ||||
4538 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | ||||
4539 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); | ||||
4540 | |||||
4541 | // If this value is passed in r0 and has the returned attribute (e.g. | ||||
4542 | // C++ 'structors), record this fact for later use. | ||||
4543 | if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { | ||||
4544 | AFI->setPreservesR0(); | ||||
4545 | } | ||||
4546 | } | ||||
4547 | |||||
4548 | // If this is an 8 or 16-bit value, it is really passed promoted | ||||
4549 | // to 32 bits. Insert an assert[sz]ext to capture this, then | ||||
4550 | // truncate to the right size. | ||||
4551 | switch (VA.getLocInfo()) { | ||||
4552 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 4552); | ||||
4553 | case CCValAssign::Full: break; | ||||
4554 | case CCValAssign::BCvt: | ||||
4555 | ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); | ||||
4556 | break; | ||||
4557 | case CCValAssign::SExt: | ||||
4558 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, | ||||
4559 | DAG.getValueType(VA.getValVT())); | ||||
4560 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | ||||
4561 | break; | ||||
4562 | case CCValAssign::ZExt: | ||||
4563 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, | ||||
4564 | DAG.getValueType(VA.getValVT())); | ||||
4565 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | ||||
4566 | break; | ||||
4567 | } | ||||
4568 | |||||
4569 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
4570 | // had been copied to the LSBs of a 32-bit register. | ||||
4571 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
4572 | if (VA.needsCustom() && | ||||
4573 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | ||||
4574 | ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue); | ||||
4575 | |||||
4576 | InVals.push_back(ArgValue); | ||||
4577 | } else { // VA.isRegLoc() | ||||
4578 | // Only arguments passed on the stack should make it here. | ||||
4579 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4579, __extension__ __PRETTY_FUNCTION__)); | ||||
4580 | assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 && "i64 should already be lowered") ? void (0) : __assert_fail ( "VA.getValVT() != MVT::i64 && \"i64 should already be lowered\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4580, __extension__ __PRETTY_FUNCTION__)); | ||||
4581 | |||||
4582 | int index = VA.getValNo(); | ||||
4583 | |||||
4584 | // Some Ins[] entries become multiple ArgLoc[] entries. | ||||
4585 | // Process them only once. | ||||
4586 | if (index != lastInsIndex) | ||||
4587 | { | ||||
4588 | ISD::ArgFlagsTy Flags = Ins[index].Flags; | ||||
4589 | // FIXME: For now, all byval parameter objects are marked mutable. | ||||
4590 | // This can be changed with more analysis. | ||||
4591 | // In case of tail call optimization mark all arguments mutable. | ||||
4592 | // Since they could be overwritten by lowering of arguments in case of | ||||
4593 | // a tail call. | ||||
4594 | if (Flags.isByVal()) { | ||||
4595 | assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4596, __extension__ __PRETTY_FUNCTION__)) | ||||
4596 | "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4596, __extension__ __PRETTY_FUNCTION__)); | ||||
4597 | unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); | ||||
4598 | |||||
4599 | int FrameIndex = StoreByValRegs( | ||||
4600 | CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, | ||||
4601 | VA.getLocMemOffset(), Flags.getByValSize()); | ||||
4602 | InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); | ||||
4603 | CCInfo.nextInRegsParam(); | ||||
4604 | } else { | ||||
4605 | unsigned FIOffset = VA.getLocMemOffset(); | ||||
4606 | int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, | ||||
4607 | FIOffset, true); | ||||
4608 | |||||
4609 | // Create load nodes to retrieve arguments from the stack. | ||||
4610 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | ||||
4611 | InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, | ||||
4612 | MachinePointerInfo::getFixedStack( | ||||
4613 | DAG.getMachineFunction(), FI))); | ||||
4614 | } | ||||
4615 | lastInsIndex = index; | ||||
4616 | } | ||||
4617 | } | ||||
4618 | } | ||||
4619 | |||||
4620 | // varargs | ||||
4621 | if (isVarArg && MFI.hasVAStart()) { | ||||
4622 | VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), | ||||
4623 | TotalArgRegsSaveSize); | ||||
4624 | if (AFI->isCmseNSEntryFunction()) { | ||||
4625 | DiagnosticInfoUnsupported Diag( | ||||
4626 | DAG.getMachineFunction().getFunction(), | ||||
4627 | "secure entry function must not be variadic", dl.getDebugLoc()); | ||||
4628 | DAG.getContext()->diagnose(Diag); | ||||
4629 | } | ||||
4630 | } | ||||
4631 | |||||
4632 | unsigned StackArgSize = CCInfo.getNextStackOffset(); | ||||
4633 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; | ||||
4634 | if (canGuaranteeTCO(CallConv, TailCallOpt)) { | ||||
4635 | // The only way to guarantee a tail call is if the callee restores its | ||||
4636 | // argument area, but it must also keep the stack aligned when doing so. | ||||
4637 | const DataLayout &DL = DAG.getDataLayout(); | ||||
4638 | StackArgSize = alignTo(StackArgSize, DL.getStackAlignment()); | ||||
4639 | |||||
4640 | AFI->setArgumentStackToRestore(StackArgSize); | ||||
4641 | } | ||||
4642 | AFI->setArgumentStackSize(StackArgSize); | ||||
4643 | |||||
4644 | if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) { | ||||
4645 | DiagnosticInfoUnsupported Diag( | ||||
4646 | DAG.getMachineFunction().getFunction(), | ||||
4647 | "secure entry function requires arguments on stack", dl.getDebugLoc()); | ||||
4648 | DAG.getContext()->diagnose(Diag); | ||||
4649 | } | ||||
4650 | |||||
4651 | return Chain; | ||||
4652 | } | ||||
4653 | |||||
4654 | /// isFloatingPointZero - Return true if this is +0.0. | ||||
4655 | static bool isFloatingPointZero(SDValue Op) { | ||||
4656 | if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) | ||||
4657 | return CFP->getValueAPF().isPosZero(); | ||||
4658 | else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { | ||||
4659 | // Maybe this has already been legalized into the constant pool? | ||||
4660 | if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { | ||||
4661 | SDValue WrapperOp = Op.getOperand(1).getOperand(0); | ||||
4662 | if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) | ||||
4663 | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) | ||||
4664 | return CFP->getValueAPF().isPosZero(); | ||||
4665 | } | ||||
4666 | } else if (Op->getOpcode() == ISD::BITCAST && | ||||
4667 | Op->getValueType(0) == MVT::f64) { | ||||
4668 | // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) | ||||
4669 | // created by LowerConstantFP(). | ||||
4670 | SDValue BitcastOp = Op->getOperand(0); | ||||
4671 | if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && | ||||
4672 | isNullConstant(BitcastOp->getOperand(0))) | ||||
4673 | return true; | ||||
4674 | } | ||||
4675 | return false; | ||||
4676 | } | ||||
4677 | |||||
4678 | /// Returns appropriate ARM CMP (cmp) and corresponding condition code for | ||||
4679 | /// the given operands. | ||||
4680 | SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
4681 | SDValue &ARMcc, SelectionDAG &DAG, | ||||
4682 | const SDLoc &dl) const { | ||||
4683 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { | ||||
4684 | unsigned C = RHSC->getZExtValue(); | ||||
4685 | if (!isLegalICmpImmediate((int32_t)C)) { | ||||
4686 | // Constant does not fit, try adjusting it by one. | ||||
4687 | switch (CC) { | ||||
4688 | default: break; | ||||
4689 | case ISD::SETLT: | ||||
4690 | case ISD::SETGE: | ||||
4691 | if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { | ||||
4692 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; | ||||
4693 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | ||||
4694 | } | ||||
4695 | break; | ||||
4696 | case ISD::SETULT: | ||||
4697 | case ISD::SETUGE: | ||||
4698 | if (C != 0 && isLegalICmpImmediate(C-1)) { | ||||
4699 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; | ||||
4700 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | ||||
4701 | } | ||||
4702 | break; | ||||
4703 | case ISD::SETLE: | ||||
4704 | case ISD::SETGT: | ||||
4705 | if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { | ||||
4706 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; | ||||
4707 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | ||||
4708 | } | ||||
4709 | break; | ||||
4710 | case ISD::SETULE: | ||||
4711 | case ISD::SETUGT: | ||||
4712 | if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { | ||||
4713 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; | ||||
4714 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | ||||
4715 | } | ||||
4716 | break; | ||||
4717 | } | ||||
4718 | } | ||||
4719 | } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && | ||||
4720 | (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { | ||||
4721 | // In ARM and Thumb-2, the compare instructions can shift their second | ||||
4722 | // operand. | ||||
4723 | CC = ISD::getSetCCSwappedOperands(CC); | ||||
4724 | std::swap(LHS, RHS); | ||||
4725 | } | ||||
4726 | |||||
4727 | // Thumb1 has very limited immediate modes, so turning an "and" into a | ||||
4728 | // shift can save multiple instructions. | ||||
4729 | // | ||||
4730 | // If we have (x & C1), and C1 is an appropriate mask, we can transform it | ||||
4731 | // into "((x << n) >> n)". But that isn't necessarily profitable on its | ||||
4732 | // own. If it's the operand to an unsigned comparison with an immediate, | ||||
4733 | // we can eliminate one of the shifts: we transform | ||||
4734 | // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". | ||||
4735 | // | ||||
4736 | // We avoid transforming cases which aren't profitable due to encoding | ||||
4737 | // details: | ||||
4738 | // | ||||
4739 | // 1. C2 fits into the immediate field of a cmp, and the transformed version | ||||
4740 | // would not; in that case, we're essentially trading one immediate load for | ||||
4741 | // another. | ||||
4742 | // 2. C1 is 255 or 65535, so we can use uxtb or uxth. | ||||
4743 | // 3. C2 is zero; we have other code for this special case. | ||||
4744 | // | ||||
4745 | // FIXME: Figure out profitability for Thumb2; we usually can't save an | ||||
4746 | // instruction, since the AND is always one instruction anyway, but we could | ||||
4747 | // use narrow instructions in some cases. | ||||
4748 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && | ||||
4749 | LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && | ||||
4750 | LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && | ||||
4751 | !isSignedIntSetCC(CC)) { | ||||
4752 | unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); | ||||
4753 | auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); | ||||
4754 | uint64_t RHSV = RHSC->getZExtValue(); | ||||
4755 | if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { | ||||
4756 | unsigned ShiftBits = countLeadingZeros(Mask); | ||||
4757 | if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { | ||||
4758 | SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); | ||||
4759 | LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); | ||||
4760 | RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); | ||||
4761 | } | ||||
4762 | } | ||||
4763 | } | ||||
4764 | |||||
4765 | // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a | ||||
4766 | // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same | ||||
4767 | // way a cmp would. | ||||
4768 | // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and | ||||
4769 | // some tweaks to the heuristics for the previous and->shift transform. | ||||
4770 | // FIXME: Optimize cases where the LHS isn't a shift. | ||||
4771 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && | ||||
4772 | isa<ConstantSDNode>(RHS) && | ||||
4773 | cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && | ||||
4774 | CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && | ||||
4775 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { | ||||
4776 | unsigned ShiftAmt = | ||||
4777 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; | ||||
4778 | SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, | ||||
4779 | DAG.getVTList(MVT::i32, MVT::i32), | ||||
4780 | LHS.getOperand(0), | ||||
4781 | DAG.getConstant(ShiftAmt, dl, MVT::i32)); | ||||
4782 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, | ||||
4783 | Shift.getValue(1), SDValue()); | ||||
4784 | ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); | ||||
4785 | return Chain.getValue(1); | ||||
4786 | } | ||||
4787 | |||||
4788 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
4789 | |||||
4790 | // If the RHS is a constant zero then the V (overflow) flag will never be | ||||
4791 | // set. This can allow us to simplify GE to PL or LT to MI, which can be | ||||
4792 | // simpler for other passes (like the peephole optimiser) to deal with. | ||||
4793 | if (isNullConstant(RHS)) { | ||||
4794 | switch (CondCode) { | ||||
4795 | default: break; | ||||
4796 | case ARMCC::GE: | ||||
4797 | CondCode = ARMCC::PL; | ||||
4798 | break; | ||||
4799 | case ARMCC::LT: | ||||
4800 | CondCode = ARMCC::MI; | ||||
4801 | break; | ||||
4802 | } | ||||
4803 | } | ||||
4804 | |||||
4805 | ARMISD::NodeType CompareType; | ||||
4806 | switch (CondCode) { | ||||
4807 | default: | ||||
4808 | CompareType = ARMISD::CMP; | ||||
4809 | break; | ||||
4810 | case ARMCC::EQ: | ||||
4811 | case ARMCC::NE: | ||||
4812 | // Uses only Z Flag | ||||
4813 | CompareType = ARMISD::CMPZ; | ||||
4814 | break; | ||||
4815 | } | ||||
4816 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
4817 | return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); | ||||
4818 | } | ||||
4819 | |||||
4820 | /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. | ||||
4821 | SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, | ||||
4822 | SelectionDAG &DAG, const SDLoc &dl, | ||||
4823 | bool Signaling) const { | ||||
4824 | assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType () != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4824, __extension__ __PRETTY_FUNCTION__)); | ||||
4825 | SDValue Cmp; | ||||
4826 | if (!isFloatingPointZero(RHS)) | ||||
4827 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, | ||||
4828 | dl, MVT::Glue, LHS, RHS); | ||||
4829 | else | ||||
4830 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, | ||||
4831 | dl, MVT::Glue, LHS); | ||||
4832 | return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); | ||||
4833 | } | ||||
4834 | |||||
4835 | /// duplicateCmp - Glue values can have only one use, so this function | ||||
4836 | /// duplicates a comparison node. | ||||
4837 | SDValue | ||||
4838 | ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { | ||||
4839 | unsigned Opc = Cmp.getOpcode(); | ||||
4840 | SDLoc DL(Cmp); | ||||
4841 | if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) | ||||
4842 | return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | ||||
4843 | |||||
4844 | assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation" ) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4844, __extension__ __PRETTY_FUNCTION__)); | ||||
4845 | Cmp = Cmp.getOperand(0); | ||||
4846 | Opc = Cmp.getOpcode(); | ||||
4847 | if (Opc == ARMISD::CMPFP) | ||||
4848 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | ||||
4849 | else { | ||||
4850 | assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT" ) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4850, __extension__ __PRETTY_FUNCTION__)); | ||||
4851 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); | ||||
4852 | } | ||||
4853 | return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); | ||||
4854 | } | ||||
4855 | |||||
4856 | // This function returns three things: the arithmetic computation itself | ||||
4857 | // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The | ||||
4858 | // comparison and the condition code define the case in which the arithmetic | ||||
4859 | // computation *does not* overflow. | ||||
4860 | std::pair<SDValue, SDValue> | ||||
4861 | ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, | ||||
4862 | SDValue &ARMcc) const { | ||||
4863 | assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 && "Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4863, __extension__ __PRETTY_FUNCTION__)); | ||||
4864 | |||||
4865 | SDValue Value, OverflowCmp; | ||||
4866 | SDValue LHS = Op.getOperand(0); | ||||
4867 | SDValue RHS = Op.getOperand(1); | ||||
4868 | SDLoc dl(Op); | ||||
4869 | |||||
4870 | // FIXME: We are currently always generating CMPs because we don't support | ||||
4871 | // generating CMN through the backend. This is not as good as the natural | ||||
4872 | // CMP case because it causes a register dependency and cannot be folded | ||||
4873 | // later. | ||||
4874 | |||||
4875 | switch (Op.getOpcode()) { | ||||
4876 | default: | ||||
4877 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4877); | ||||
4878 | case ISD::SADDO: | ||||
4879 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | ||||
4880 | Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); | ||||
4881 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | ||||
4882 | break; | ||||
4883 | case ISD::UADDO: | ||||
4884 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | ||||
4885 | // We use ADDC here to correspond to its use in LowerUnsignedALUO. | ||||
4886 | // We do not use it in the USUBO case as Value may not be used. | ||||
4887 | Value = DAG.getNode(ARMISD::ADDC, dl, | ||||
4888 | DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) | ||||
4889 | .getValue(0); | ||||
4890 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | ||||
4891 | break; | ||||
4892 | case ISD::SSUBO: | ||||
4893 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | ||||
4894 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | ||||
4895 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | ||||
4896 | break; | ||||
4897 | case ISD::USUBO: | ||||
4898 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | ||||
4899 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | ||||
4900 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | ||||
4901 | break; | ||||
4902 | case ISD::UMULO: | ||||
4903 | // We generate a UMUL_LOHI and then check if the high word is 0. | ||||
4904 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | ||||
4905 | Value = DAG.getNode(ISD::UMUL_LOHI, dl, | ||||
4906 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | ||||
4907 | LHS, RHS); | ||||
4908 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | ||||
4909 | DAG.getConstant(0, dl, MVT::i32)); | ||||
4910 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | ||||
4911 | break; | ||||
4912 | case ISD::SMULO: | ||||
4913 | // We generate a SMUL_LOHI and then check if all the bits of the high word | ||||
4914 | // are the same as the sign bit of the low word. | ||||
4915 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | ||||
4916 | Value = DAG.getNode(ISD::SMUL_LOHI, dl, | ||||
4917 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | ||||
4918 | LHS, RHS); | ||||
4919 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | ||||
4920 | DAG.getNode(ISD::SRA, dl, Op.getValueType(), | ||||
4921 | Value.getValue(0), | ||||
4922 | DAG.getConstant(31, dl, MVT::i32))); | ||||
4923 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | ||||
4924 | break; | ||||
4925 | } // switch (...) | ||||
4926 | |||||
4927 | return std::make_pair(Value, OverflowCmp); | ||||
4928 | } | ||||
4929 | |||||
4930 | SDValue | ||||
4931 | ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { | ||||
4932 | // Let legalize expand this if it isn't a legal type yet. | ||||
4933 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | ||||
4934 | return SDValue(); | ||||
4935 | |||||
4936 | SDValue Value, OverflowCmp; | ||||
4937 | SDValue ARMcc; | ||||
4938 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); | ||||
4939 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
4940 | SDLoc dl(Op); | ||||
4941 | // We use 0 and 1 as false and true values. | ||||
4942 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); | ||||
4943 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); | ||||
4944 | EVT VT = Op.getValueType(); | ||||
4945 | |||||
4946 | SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, | ||||
4947 | ARMcc, CCR, OverflowCmp); | ||||
4948 | |||||
4949 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); | ||||
4950 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | ||||
4951 | } | ||||
4952 | |||||
4953 | static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, | ||||
4954 | SelectionDAG &DAG) { | ||||
4955 | SDLoc DL(BoolCarry); | ||||
4956 | EVT CarryVT = BoolCarry.getValueType(); | ||||
4957 | |||||
4958 | // This converts the boolean value carry into the carry flag by doing | ||||
4959 | // ARMISD::SUBC Carry, 1 | ||||
4960 | SDValue Carry = DAG.getNode(ARMISD::SUBC, DL, | ||||
4961 | DAG.getVTList(CarryVT, MVT::i32), | ||||
4962 | BoolCarry, DAG.getConstant(1, DL, CarryVT)); | ||||
4963 | return Carry.getValue(1); | ||||
4964 | } | ||||
4965 | |||||
4966 | static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, | ||||
4967 | SelectionDAG &DAG) { | ||||
4968 | SDLoc DL(Flags); | ||||
4969 | |||||
4970 | // Now convert the carry flag into a boolean carry. We do this | ||||
4971 | // using ARMISD:ADDE 0, 0, Carry | ||||
4972 | return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), | ||||
4973 | DAG.getConstant(0, DL, MVT::i32), | ||||
4974 | DAG.getConstant(0, DL, MVT::i32), Flags); | ||||
4975 | } | ||||
4976 | |||||
4977 | SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, | ||||
4978 | SelectionDAG &DAG) const { | ||||
4979 | // Let legalize expand this if it isn't a legal type yet. | ||||
4980 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | ||||
4981 | return SDValue(); | ||||
4982 | |||||
4983 | SDValue LHS = Op.getOperand(0); | ||||
4984 | SDValue RHS = Op.getOperand(1); | ||||
4985 | SDLoc dl(Op); | ||||
4986 | |||||
4987 | EVT VT = Op.getValueType(); | ||||
4988 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); | ||||
4989 | SDValue Value; | ||||
4990 | SDValue Overflow; | ||||
4991 | switch (Op.getOpcode()) { | ||||
4992 | default: | ||||
4993 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4993); | ||||
4994 | case ISD::UADDO: | ||||
4995 | Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); | ||||
4996 | // Convert the carry flag into a boolean value. | ||||
4997 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | ||||
4998 | break; | ||||
4999 | case ISD::USUBO: { | ||||
5000 | Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); | ||||
5001 | // Convert the carry flag into a boolean value. | ||||
5002 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | ||||
5003 | // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow | ||||
5004 | // value. So compute 1 - C. | ||||
5005 | Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, | ||||
5006 | DAG.getConstant(1, dl, MVT::i32), Overflow); | ||||
5007 | break; | ||||
5008 | } | ||||
5009 | } | ||||
5010 | |||||
5011 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | ||||
5012 | } | ||||
5013 | |||||
5014 | static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, | ||||
5015 | const ARMSubtarget *Subtarget) { | ||||
5016 | EVT VT = Op.getValueType(); | ||||
5017 | if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) | ||||
5018 | return SDValue(); | ||||
5019 | if (!VT.isSimple()) | ||||
5020 | return SDValue(); | ||||
5021 | |||||
5022 | unsigned NewOpcode; | ||||
5023 | switch (VT.getSimpleVT().SimpleTy) { | ||||
5024 | default: | ||||
5025 | return SDValue(); | ||||
5026 | case MVT::i8: | ||||
5027 | switch (Op->getOpcode()) { | ||||
5028 | case ISD::UADDSAT: | ||||
5029 | NewOpcode = ARMISD::UQADD8b; | ||||
5030 | break; | ||||
5031 | case ISD::SADDSAT: | ||||
5032 | NewOpcode = ARMISD::QADD8b; | ||||
5033 | break; | ||||
5034 | case ISD::USUBSAT: | ||||
5035 | NewOpcode = ARMISD::UQSUB8b; | ||||
5036 | break; | ||||
5037 | case ISD::SSUBSAT: | ||||
5038 | NewOpcode = ARMISD::QSUB8b; | ||||
5039 | break; | ||||
5040 | } | ||||
5041 | break; | ||||
5042 | case MVT::i16: | ||||
5043 | switch (Op->getOpcode()) { | ||||
5044 | case ISD::UADDSAT: | ||||
5045 | NewOpcode = ARMISD::UQADD16b; | ||||
5046 | break; | ||||
5047 | case ISD::SADDSAT: | ||||
5048 | NewOpcode = ARMISD::QADD16b; | ||||
5049 | break; | ||||
5050 | case ISD::USUBSAT: | ||||
5051 | NewOpcode = ARMISD::UQSUB16b; | ||||
5052 | break; | ||||
5053 | case ISD::SSUBSAT: | ||||
5054 | NewOpcode = ARMISD::QSUB16b; | ||||
5055 | break; | ||||
5056 | } | ||||
5057 | break; | ||||
5058 | } | ||||
5059 | |||||
5060 | SDLoc dl(Op); | ||||
5061 | SDValue Add = | ||||
5062 | DAG.getNode(NewOpcode, dl, MVT::i32, | ||||
5063 | DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), | ||||
5064 | DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); | ||||
5065 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); | ||||
5066 | } | ||||
5067 | |||||
5068 | SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { | ||||
5069 | SDValue Cond = Op.getOperand(0); | ||||
5070 | SDValue SelectTrue = Op.getOperand(1); | ||||
5071 | SDValue SelectFalse = Op.getOperand(2); | ||||
5072 | SDLoc dl(Op); | ||||
5073 | unsigned Opc = Cond.getOpcode(); | ||||
5074 | |||||
5075 | if (Cond.getResNo() == 1 && | ||||
5076 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5077 | Opc == ISD::USUBO)) { | ||||
5078 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | ||||
5079 | return SDValue(); | ||||
5080 | |||||
5081 | SDValue Value, OverflowCmp; | ||||
5082 | SDValue ARMcc; | ||||
5083 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | ||||
5084 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5085 | EVT VT = Op.getValueType(); | ||||
5086 | |||||
5087 | return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, | ||||
5088 | OverflowCmp, DAG); | ||||
5089 | } | ||||
5090 | |||||
5091 | // Convert: | ||||
5092 | // | ||||
5093 | // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) | ||||
5094 | // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) | ||||
5095 | // | ||||
5096 | if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { | ||||
5097 | const ConstantSDNode *CMOVTrue = | ||||
5098 | dyn_cast<ConstantSDNode>(Cond.getOperand(0)); | ||||
5099 | const ConstantSDNode *CMOVFalse = | ||||
5100 | dyn_cast<ConstantSDNode>(Cond.getOperand(1)); | ||||
5101 | |||||
5102 | if (CMOVTrue && CMOVFalse) { | ||||
5103 | unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); | ||||
5104 | unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); | ||||
5105 | |||||
5106 | SDValue True; | ||||
5107 | SDValue False; | ||||
5108 | if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { | ||||
5109 | True = SelectTrue; | ||||
5110 | False = SelectFalse; | ||||
5111 | } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { | ||||
5112 | True = SelectFalse; | ||||
5113 | False = SelectTrue; | ||||
5114 | } | ||||
5115 | |||||
5116 | if (True.getNode() && False.getNode()) { | ||||
5117 | EVT VT = Op.getValueType(); | ||||
5118 | SDValue ARMcc = Cond.getOperand(2); | ||||
5119 | SDValue CCR = Cond.getOperand(3); | ||||
5120 | SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); | ||||
5121 | assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void ( 0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5121, __extension__ __PRETTY_FUNCTION__)); | ||||
5122 | return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); | ||||
5123 | } | ||||
5124 | } | ||||
5125 | } | ||||
5126 | |||||
5127 | // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the | ||||
5128 | // undefined bits before doing a full-word comparison with zero. | ||||
5129 | Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, | ||||
5130 | DAG.getConstant(1, dl, Cond.getValueType())); | ||||
5131 | |||||
5132 | return DAG.getSelectCC(dl, Cond, | ||||
5133 | DAG.getConstant(0, dl, Cond.getValueType()), | ||||
5134 | SelectTrue, SelectFalse, ISD::SETNE); | ||||
5135 | } | ||||
5136 | |||||
5137 | static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | ||||
5138 | bool &swpCmpOps, bool &swpVselOps) { | ||||
5139 | // Start by selecting the GE condition code for opcodes that return true for | ||||
5140 | // 'equality' | ||||
5141 | if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || | ||||
5142 | CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE) | ||||
5143 | CondCode = ARMCC::GE; | ||||
5144 | |||||
5145 | // and GT for opcodes that return false for 'equality'. | ||||
5146 | else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || | ||||
5147 | CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT) | ||||
5148 | CondCode = ARMCC::GT; | ||||
5149 | |||||
5150 | // Since we are constrained to GE/GT, if the opcode contains 'less', we need | ||||
5151 | // to swap the compare operands. | ||||
5152 | if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || | ||||
5153 | CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT) | ||||
5154 | swpCmpOps = true; | ||||
5155 | |||||
5156 | // Both GT and GE are ordered comparisons, and return false for 'unordered'. | ||||
5157 | // If we have an unordered opcode, we need to swap the operands to the VSEL | ||||
5158 | // instruction (effectively negating the condition). | ||||
5159 | // | ||||
5160 | // This also has the effect of swapping which one of 'less' or 'greater' | ||||
5161 | // returns true, so we also swap the compare operands. It also switches | ||||
5162 | // whether we return true for 'equality', so we compensate by picking the | ||||
5163 | // opposite condition code to our original choice. | ||||
5164 | if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || | ||||
5165 | CC == ISD::SETUGT) { | ||||
5166 | swpCmpOps = !swpCmpOps; | ||||
5167 | swpVselOps = !swpVselOps; | ||||
5168 | CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; | ||||
5169 | } | ||||
5170 | |||||
5171 | // 'ordered' is 'anything but unordered', so use the VS condition code and | ||||
5172 | // swap the VSEL operands. | ||||
5173 | if (CC == ISD::SETO) { | ||||
5174 | CondCode = ARMCC::VS; | ||||
5175 | swpVselOps = true; | ||||
5176 | } | ||||
5177 | |||||
5178 | // 'unordered or not equal' is 'anything but equal', so use the EQ condition | ||||
5179 | // code and swap the VSEL operands. Also do this if we don't care about the | ||||
5180 | // unordered case. | ||||
5181 | if (CC == ISD::SETUNE || CC == ISD::SETNE) { | ||||
5182 | CondCode = ARMCC::EQ; | ||||
5183 | swpVselOps = true; | ||||
5184 | } | ||||
5185 | } | ||||
5186 | |||||
5187 | SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, | ||||
5188 | SDValue TrueVal, SDValue ARMcc, SDValue CCR, | ||||
5189 | SDValue Cmp, SelectionDAG &DAG) const { | ||||
5190 | if (!Subtarget->hasFP64() && VT == MVT::f64) { | ||||
5191 | FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
5192 | DAG.getVTList(MVT::i32, MVT::i32), FalseVal); | ||||
5193 | TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
5194 | DAG.getVTList(MVT::i32, MVT::i32), TrueVal); | ||||
5195 | |||||
5196 | SDValue TrueLow = TrueVal.getValue(0); | ||||
5197 | SDValue TrueHigh = TrueVal.getValue(1); | ||||
5198 | SDValue FalseLow = FalseVal.getValue(0); | ||||
5199 | SDValue FalseHigh = FalseVal.getValue(1); | ||||
5200 | |||||
5201 | SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, | ||||
5202 | ARMcc, CCR, Cmp); | ||||
5203 | SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, | ||||
5204 | ARMcc, CCR, duplicateCmp(Cmp, DAG)); | ||||
5205 | |||||
5206 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); | ||||
5207 | } else { | ||||
5208 | return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, | ||||
5209 | Cmp); | ||||
5210 | } | ||||
5211 | } | ||||
5212 | |||||
5213 | static bool isGTorGE(ISD::CondCode CC) { | ||||
5214 | return CC == ISD::SETGT || CC == ISD::SETGE; | ||||
5215 | } | ||||
5216 | |||||
5217 | static bool isLTorLE(ISD::CondCode CC) { | ||||
5218 | return CC == ISD::SETLT || CC == ISD::SETLE; | ||||
5219 | } | ||||
5220 | |||||
5221 | // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. | ||||
5222 | // All of these conditions (and their <= and >= counterparts) will do: | ||||
5223 | // x < k ? k : x | ||||
5224 | // x > k ? x : k | ||||
5225 | // k < x ? x : k | ||||
5226 | // k > x ? k : x | ||||
5227 | static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, | ||||
5228 | const SDValue TrueVal, const SDValue FalseVal, | ||||
5229 | const ISD::CondCode CC, const SDValue K) { | ||||
5230 | return (isGTorGE(CC) && | ||||
5231 | ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || | ||||
5232 | (isLTorLE(CC) && | ||||
5233 | ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); | ||||
5234 | } | ||||
5235 | |||||
5236 | // Check if two chained conditionals could be converted into SSAT or USAT. | ||||
5237 | // | ||||
5238 | // SSAT can replace a set of two conditional selectors that bound a number to an | ||||
5239 | // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: | ||||
5240 | // | ||||
5241 | // x < -k ? -k : (x > k ? k : x) | ||||
5242 | // x < -k ? -k : (x < k ? x : k) | ||||
5243 | // x > -k ? (x > k ? k : x) : -k | ||||
5244 | // x < k ? (x < -k ? -k : x) : k | ||||
5245 | // etc. | ||||
5246 | // | ||||
5247 | // LLVM canonicalizes these to either a min(max()) or a max(min()) | ||||
5248 | // pattern. This function tries to match one of these and will return a SSAT | ||||
5249 | // node if successful. | ||||
5250 | // | ||||
5251 | // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 | ||||
5252 | // is a power of 2. | ||||
5253 | static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) { | ||||
5254 | EVT VT = Op.getValueType(); | ||||
5255 | SDValue V1 = Op.getOperand(0); | ||||
5256 | SDValue K1 = Op.getOperand(1); | ||||
5257 | SDValue TrueVal1 = Op.getOperand(2); | ||||
5258 | SDValue FalseVal1 = Op.getOperand(3); | ||||
5259 | ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5260 | |||||
5261 | const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; | ||||
5262 | if (Op2.getOpcode() != ISD::SELECT_CC) | ||||
5263 | return SDValue(); | ||||
5264 | |||||
5265 | SDValue V2 = Op2.getOperand(0); | ||||
5266 | SDValue K2 = Op2.getOperand(1); | ||||
5267 | SDValue TrueVal2 = Op2.getOperand(2); | ||||
5268 | SDValue FalseVal2 = Op2.getOperand(3); | ||||
5269 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); | ||||
5270 | |||||
5271 | SDValue V1Tmp = V1; | ||||
5272 | SDValue V2Tmp = V2; | ||||
5273 | |||||
5274 | // Check that the registers and the constants match a max(min()) or min(max()) | ||||
5275 | // pattern | ||||
5276 | if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 || | ||||
5277 | K2 != FalseVal2 || | ||||
5278 | !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) | ||||
5279 | return SDValue(); | ||||
5280 | |||||
5281 | // Check that the constant in the lower-bound check is | ||||
5282 | // the opposite of the constant in the upper-bound check | ||||
5283 | // in 1's complement. | ||||
5284 | if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2)) | ||||
5285 | return SDValue(); | ||||
5286 | |||||
5287 | int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue(); | ||||
5288 | int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue(); | ||||
5289 | int64_t PosVal = std::max(Val1, Val2); | ||||
5290 | int64_t NegVal = std::min(Val1, Val2); | ||||
5291 | |||||
5292 | if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || | ||||
5293 | !isPowerOf2_64(PosVal + 1)) | ||||
5294 | return SDValue(); | ||||
5295 | |||||
5296 | // Handle the difference between USAT (unsigned) and SSAT (signed) | ||||
5297 | // saturation | ||||
5298 | // At this point, PosVal is guaranteed to be positive | ||||
5299 | uint64_t K = PosVal; | ||||
5300 | SDLoc dl(Op); | ||||
5301 | if (Val1 == ~Val2) | ||||
5302 | return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp, | ||||
5303 | DAG.getConstant(countTrailingOnes(K), dl, VT)); | ||||
5304 | if (NegVal == 0) | ||||
5305 | return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp, | ||||
5306 | DAG.getConstant(countTrailingOnes(K), dl, VT)); | ||||
5307 | |||||
5308 | return SDValue(); | ||||
5309 | } | ||||
5310 | |||||
5311 | // Check if a condition of the type x < k ? k : x can be converted into a | ||||
5312 | // bit operation instead of conditional moves. | ||||
5313 | // Currently this is allowed given: | ||||
5314 | // - The conditions and values match up | ||||
5315 | // - k is 0 or -1 (all ones) | ||||
5316 | // This function will not check the last condition, thats up to the caller | ||||
5317 | // It returns true if the transformation can be made, and in such case | ||||
5318 | // returns x in V, and k in SatK. | ||||
5319 | static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, | ||||
5320 | SDValue &SatK) | ||||
5321 | { | ||||
5322 | SDValue LHS = Op.getOperand(0); | ||||
5323 | SDValue RHS = Op.getOperand(1); | ||||
5324 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5325 | SDValue TrueVal = Op.getOperand(2); | ||||
5326 | SDValue FalseVal = Op.getOperand(3); | ||||
5327 | |||||
5328 | SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS) | ||||
5329 | ? &RHS | ||||
5330 | : nullptr; | ||||
5331 | |||||
5332 | // No constant operation in comparison, early out | ||||
5333 | if (!K) | ||||
5334 | return false; | ||||
5335 | |||||
5336 | SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal; | ||||
5337 | V = (KTmp == TrueVal) ? FalseVal : TrueVal; | ||||
5338 | SDValue VTmp = (K && *K == LHS) ? RHS : LHS; | ||||
5339 | |||||
5340 | // If the constant on left and right side, or variable on left and right, | ||||
5341 | // does not match, early out | ||||
5342 | if (*K != KTmp || V != VTmp) | ||||
5343 | return false; | ||||
5344 | |||||
5345 | if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { | ||||
5346 | SatK = *K; | ||||
5347 | return true; | ||||
5348 | } | ||||
5349 | |||||
5350 | return false; | ||||
5351 | } | ||||
5352 | |||||
5353 | bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const { | ||||
5354 | if (VT == MVT::f32) | ||||
5355 | return !Subtarget->hasVFP2Base(); | ||||
5356 | if (VT == MVT::f64) | ||||
5357 | return !Subtarget->hasFP64(); | ||||
5358 | if (VT == MVT::f16) | ||||
5359 | return !Subtarget->hasFullFP16(); | ||||
5360 | return false; | ||||
5361 | } | ||||
5362 | |||||
5363 | SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { | ||||
5364 | EVT VT = Op.getValueType(); | ||||
5365 | SDLoc dl(Op); | ||||
5366 | |||||
5367 | // Try to convert two saturating conditional selects into a single SSAT | ||||
5368 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) | ||||
5369 | if (SDValue SatValue = LowerSaturatingConditional(Op, DAG)) | ||||
5370 | return SatValue; | ||||
5371 | |||||
5372 | // Try to convert expressions of the form x < k ? k : x (and similar forms) | ||||
5373 | // into more efficient bit operations, which is possible when k is 0 or -1 | ||||
5374 | // On ARM and Thumb-2 which have flexible operand 2 this will result in | ||||
5375 | // single instructions. On Thumb the shift and the bit operation will be two | ||||
5376 | // instructions. | ||||
5377 | // Only allow this transformation on full-width (32-bit) operations | ||||
5378 | SDValue LowerSatConstant; | ||||
5379 | SDValue SatValue; | ||||
5380 | if (VT == MVT::i32 && | ||||
5381 | isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { | ||||
5382 | SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, | ||||
5383 | DAG.getConstant(31, dl, VT)); | ||||
5384 | if (isNullConstant(LowerSatConstant)) { | ||||
5385 | SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, | ||||
5386 | DAG.getAllOnesConstant(dl, VT)); | ||||
5387 | return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); | ||||
5388 | } else if (isAllOnesConstant(LowerSatConstant)) | ||||
5389 | return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); | ||||
5390 | } | ||||
5391 | |||||
5392 | SDValue LHS = Op.getOperand(0); | ||||
5393 | SDValue RHS = Op.getOperand(1); | ||||
5394 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5395 | SDValue TrueVal = Op.getOperand(2); | ||||
5396 | SDValue FalseVal = Op.getOperand(3); | ||||
5397 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal); | ||||
5398 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal); | ||||
5399 | |||||
5400 | if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && | ||||
5401 | LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { | ||||
5402 | unsigned TVal = CTVal->getZExtValue(); | ||||
5403 | unsigned FVal = CFVal->getZExtValue(); | ||||
5404 | unsigned Opcode = 0; | ||||
5405 | |||||
5406 | if (TVal == ~FVal) { | ||||
5407 | Opcode = ARMISD::CSINV; | ||||
5408 | } else if (TVal == ~FVal + 1) { | ||||
5409 | Opcode = ARMISD::CSNEG; | ||||
5410 | } else if (TVal + 1 == FVal) { | ||||
5411 | Opcode = ARMISD::CSINC; | ||||
5412 | } else if (TVal == FVal + 1) { | ||||
5413 | Opcode = ARMISD::CSINC; | ||||
5414 | std::swap(TrueVal, FalseVal); | ||||
5415 | std::swap(TVal, FVal); | ||||
5416 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5417 | } | ||||
5418 | |||||
5419 | if (Opcode) { | ||||
5420 | // If one of the constants is cheaper than another, materialise the | ||||
5421 | // cheaper one and let the csel generate the other. | ||||
5422 | if (Opcode != ARMISD::CSINC && | ||||
5423 | HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { | ||||
5424 | std::swap(TrueVal, FalseVal); | ||||
5425 | std::swap(TVal, FVal); | ||||
5426 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5427 | } | ||||
5428 | |||||
5429 | // Attempt to use ZR checking TVal is 0, possibly inverting the condition | ||||
5430 | // to get there. CSINC not is invertable like the other two (~(~a) == a, | ||||
5431 | // -(-a) == a, but (a+1)+1 != a). | ||||
5432 | if (FVal == 0 && Opcode != ARMISD::CSINC) { | ||||
5433 | std::swap(TrueVal, FalseVal); | ||||
5434 | std::swap(TVal, FVal); | ||||
5435 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5436 | } | ||||
5437 | |||||
5438 | // Drops F's value because we can get it by inverting/negating TVal. | ||||
5439 | FalseVal = TrueVal; | ||||
5440 | |||||
5441 | SDValue ARMcc; | ||||
5442 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5443 | EVT VT = TrueVal.getValueType(); | ||||
5444 | return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); | ||||
5445 | } | ||||
5446 | } | ||||
5447 | |||||
5448 | if (isUnsupportedFloatingType(LHS.getValueType())) { | ||||
5449 | DAG.getTargetLoweringInfo().softenSetCCOperands( | ||||
5450 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | ||||
5451 | |||||
5452 | // If softenSetCCOperands only returned one value, we should compare it to | ||||
5453 | // zero. | ||||
5454 | if (!RHS.getNode()) { | ||||
5455 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | ||||
5456 | CC = ISD::SETNE; | ||||
5457 | } | ||||
5458 | } | ||||
5459 | |||||
5460 | if (LHS.getValueType() == MVT::i32) { | ||||
5461 | // Try to generate VSEL on ARMv8. | ||||
5462 | // The VSEL instruction can't use all the usual ARM condition | ||||
5463 | // codes: it only has two bits to select the condition code, so it's | ||||
5464 | // constrained to use only GE, GT, VS and EQ. | ||||
5465 | // | ||||
5466 | // To implement all the various ISD::SETXXX opcodes, we sometimes need to | ||||
5467 | // swap the operands of the previous compare instruction (effectively | ||||
5468 | // inverting the compare condition, swapping 'less' and 'greater') and | ||||
5469 | // sometimes need to swap the operands to the VSEL (which inverts the | ||||
5470 | // condition in the sense of firing whenever the previous condition didn't) | ||||
5471 | if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 || | ||||
5472 | TrueVal.getValueType() == MVT::f32 || | ||||
5473 | TrueVal.getValueType() == MVT::f64)) { | ||||
5474 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
5475 | if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || | ||||
5476 | CondCode == ARMCC::VC || CondCode == ARMCC::NE) { | ||||
5477 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5478 | std::swap(TrueVal, FalseVal); | ||||
5479 | } | ||||
5480 | } | ||||
5481 | |||||
5482 | SDValue ARMcc; | ||||
5483 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5484 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5485 | // Choose GE over PL, which vsel does now support | ||||
5486 | if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL) | ||||
5487 | ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); | ||||
5488 | return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | ||||
5489 | } | ||||
5490 | |||||
5491 | ARMCC::CondCodes CondCode, CondCode2; | ||||
5492 | FPCCToARMCC(CC, CondCode, CondCode2); | ||||
5493 | |||||
5494 | // Normalize the fp compare. If RHS is zero we prefer to keep it there so we | ||||
5495 | // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we | ||||
5496 | // must use VSEL (limited condition codes), due to not having conditional f16 | ||||
5497 | // moves. | ||||
5498 | if (Subtarget->hasFPARMv8Base() && | ||||
5499 | !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) && | ||||
5500 | (TrueVal.getValueType() == MVT::f16 || | ||||
5501 | TrueVal.getValueType() == MVT::f32 || | ||||
5502 | TrueVal.getValueType() == MVT::f64)) { | ||||
5503 | bool swpCmpOps = false; | ||||
5504 | bool swpVselOps = false; | ||||
5505 | checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); | ||||
5506 | |||||
5507 | if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || | ||||
5508 | CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { | ||||
5509 | if (swpCmpOps) | ||||
5510 | std::swap(LHS, RHS); | ||||
5511 | if (swpVselOps) | ||||
5512 | std::swap(TrueVal, FalseVal); | ||||
5513 | } | ||||
5514 | } | ||||
5515 | |||||
5516 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5517 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5518 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5519 | SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | ||||
5520 | if (CondCode2 != ARMCC::AL) { | ||||
5521 | SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); | ||||
5522 | // FIXME: Needs another CMP because flag can have but one use. | ||||
5523 | SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5524 | Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); | ||||
5525 | } | ||||
5526 | return Result; | ||||
5527 | } | ||||
5528 | |||||
5529 | /// canChangeToInt - Given the fp compare operand, return true if it is suitable | ||||
5530 | /// to morph to an integer compare sequence. | ||||
5531 | static bool canChangeToInt(SDValue Op, bool &SeenZero, | ||||
5532 | const ARMSubtarget *Subtarget) { | ||||
5533 | SDNode *N = Op.getNode(); | ||||
5534 | if (!N->hasOneUse()) | ||||
5535 | // Otherwise it requires moving the value from fp to integer registers. | ||||
5536 | return false; | ||||
5537 | if (!N->getNumValues()) | ||||
5538 | return false; | ||||
5539 | EVT VT = Op.getValueType(); | ||||
5540 | if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) | ||||
5541 | // f32 case is generally profitable. f64 case only makes sense when vcmpe + | ||||
5542 | // vmrs are very slow, e.g. cortex-a8. | ||||
5543 | return false; | ||||
5544 | |||||
5545 | if (isFloatingPointZero(Op)) { | ||||
5546 | SeenZero = true; | ||||
5547 | return true; | ||||
5548 | } | ||||
5549 | return ISD::isNormalLoad(N); | ||||
5550 | } | ||||
5551 | |||||
5552 | static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { | ||||
5553 | if (isFloatingPointZero(Op)) | ||||
5554 | return DAG.getConstant(0, SDLoc(Op), MVT::i32); | ||||
5555 | |||||
5556 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) | ||||
5557 | return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), | ||||
5558 | Ld->getPointerInfo(), Ld->getAlignment(), | ||||
5559 | Ld->getMemOperand()->getFlags()); | ||||
5560 | |||||
5561 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5561); | ||||
5562 | } | ||||
5563 | |||||
5564 | static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, | ||||
5565 | SDValue &RetVal1, SDValue &RetVal2) { | ||||
5566 | SDLoc dl(Op); | ||||
5567 | |||||
5568 | if (isFloatingPointZero(Op)) { | ||||
5569 | RetVal1 = DAG.getConstant(0, dl, MVT::i32); | ||||
5570 | RetVal2 = DAG.getConstant(0, dl, MVT::i32); | ||||
5571 | return; | ||||
5572 | } | ||||
5573 | |||||
5574 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { | ||||
5575 | SDValue Ptr = Ld->getBasePtr(); | ||||
5576 | RetVal1 = | ||||
5577 | DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), | ||||
5578 | Ld->getAlignment(), Ld->getMemOperand()->getFlags()); | ||||
5579 | |||||
5580 | EVT PtrType = Ptr.getValueType(); | ||||
5581 | unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); | ||||
5582 | SDValue NewPtr = DAG.getNode(ISD::ADD, dl, | ||||
5583 | PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); | ||||
5584 | RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, | ||||
5585 | Ld->getPointerInfo().getWithOffset(4), NewAlign, | ||||
5586 | Ld->getMemOperand()->getFlags()); | ||||
5587 | return; | ||||
5588 | } | ||||
5589 | |||||
5590 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5590); | ||||
5591 | } | ||||
5592 | |||||
5593 | /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some | ||||
5594 | /// f32 and even f64 comparisons to integer ones. | ||||
5595 | SDValue | ||||
5596 | ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { | ||||
5597 | SDValue Chain = Op.getOperand(0); | ||||
5598 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | ||||
5599 | SDValue LHS = Op.getOperand(2); | ||||
5600 | SDValue RHS = Op.getOperand(3); | ||||
5601 | SDValue Dest = Op.getOperand(4); | ||||
5602 | SDLoc dl(Op); | ||||
5603 | |||||
5604 | bool LHSSeenZero = false; | ||||
5605 | bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); | ||||
5606 | bool RHSSeenZero = false; | ||||
5607 | bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); | ||||
5608 | if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { | ||||
5609 | // If unsafe fp math optimization is enabled and there are no other uses of | ||||
5610 | // the CMP operands, and the condition code is EQ or NE, we can optimize it | ||||
5611 | // to an integer comparison. | ||||
5612 | if (CC == ISD::SETOEQ) | ||||
5613 | CC = ISD::SETEQ; | ||||
5614 | else if (CC == ISD::SETUNE) | ||||
5615 | CC = ISD::SETNE; | ||||
5616 | |||||
5617 | SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); | ||||
5618 | SDValue ARMcc; | ||||
5619 | if (LHS.getValueType() == MVT::f32) { | ||||
5620 | LHS = DAG.getNode(ISD::AND, dl, MVT::i32, | ||||
5621 | bitcastf32Toi32(LHS, DAG), Mask); | ||||
5622 | RHS = DAG.getNode(ISD::AND, dl, MVT::i32, | ||||
5623 | bitcastf32Toi32(RHS, DAG), Mask); | ||||
5624 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5625 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5626 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | ||||
5627 | Chain, Dest, ARMcc, CCR, Cmp); | ||||
5628 | } | ||||
5629 | |||||
5630 | SDValue LHS1, LHS2; | ||||
5631 | SDValue RHS1, RHS2; | ||||
5632 | expandf64Toi32(LHS, DAG, LHS1, LHS2); | ||||
5633 | expandf64Toi32(RHS, DAG, RHS1, RHS2); | ||||
5634 | LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); | ||||
5635 | RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); | ||||
5636 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
5637 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5638 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
5639 | SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; | ||||
5640 | return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); | ||||
5641 | } | ||||
5642 | |||||
5643 | return SDValue(); | ||||
5644 | } | ||||
5645 | |||||
5646 | SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { | ||||
5647 | SDValue Chain = Op.getOperand(0); | ||||
5648 | SDValue Cond = Op.getOperand(1); | ||||
5649 | SDValue Dest = Op.getOperand(2); | ||||
5650 | SDLoc dl(Op); | ||||
5651 | |||||
5652 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | ||||
5653 | // instruction. | ||||
5654 | unsigned Opc = Cond.getOpcode(); | ||||
5655 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | ||||
5656 | !Subtarget->isThumb1Only(); | ||||
5657 | if (Cond.getResNo() == 1 && | ||||
5658 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5659 | Opc == ISD::USUBO || OptimizeMul)) { | ||||
5660 | // Only lower legal XALUO ops. | ||||
5661 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | ||||
5662 | return SDValue(); | ||||
5663 | |||||
5664 | // The actual operation with overflow check. | ||||
5665 | SDValue Value, OverflowCmp; | ||||
5666 | SDValue ARMcc; | ||||
5667 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | ||||
5668 | |||||
5669 | // Reverse the condition code. | ||||
5670 | ARMCC::CondCodes CondCode = | ||||
5671 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | ||||
5672 | CondCode = ARMCC::getOppositeCondition(CondCode); | ||||
5673 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | ||||
5674 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5675 | |||||
5676 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | ||||
5677 | OverflowCmp); | ||||
5678 | } | ||||
5679 | |||||
5680 | return SDValue(); | ||||
5681 | } | ||||
5682 | |||||
5683 | SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { | ||||
5684 | SDValue Chain = Op.getOperand(0); | ||||
5685 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | ||||
5686 | SDValue LHS = Op.getOperand(2); | ||||
5687 | SDValue RHS = Op.getOperand(3); | ||||
5688 | SDValue Dest = Op.getOperand(4); | ||||
5689 | SDLoc dl(Op); | ||||
5690 | |||||
5691 | if (isUnsupportedFloatingType(LHS.getValueType())) { | ||||
5692 | DAG.getTargetLoweringInfo().softenSetCCOperands( | ||||
5693 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | ||||
5694 | |||||
5695 | // If softenSetCCOperands only returned one value, we should compare it to | ||||
5696 | // zero. | ||||
5697 | if (!RHS.getNode()) { | ||||
5698 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | ||||
5699 | CC = ISD::SETNE; | ||||
5700 | } | ||||
5701 | } | ||||
5702 | |||||
5703 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | ||||
5704 | // instruction. | ||||
5705 | unsigned Opc = LHS.getOpcode(); | ||||
5706 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | ||||
5707 | !Subtarget->isThumb1Only(); | ||||
5708 | if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && | ||||
5709 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5710 | Opc == ISD::USUBO || OptimizeMul) && | ||||
5711 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { | ||||
5712 | // Only lower legal XALUO ops. | ||||
5713 | if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) | ||||
5714 | return SDValue(); | ||||
5715 | |||||
5716 | // The actual operation with overflow check. | ||||
5717 | SDValue Value, OverflowCmp; | ||||
5718 | SDValue ARMcc; | ||||
5719 | std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); | ||||
5720 | |||||
5721 | if ((CC == ISD::SETNE) != isOneConstant(RHS)) { | ||||
5722 | // Reverse the condition code. | ||||
5723 | ARMCC::CondCodes CondCode = | ||||
5724 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | ||||
5725 | CondCode = ARMCC::getOppositeCondition(CondCode); | ||||
5726 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | ||||
5727 | } | ||||
5728 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5729 | |||||
5730 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | ||||
5731 | OverflowCmp); | ||||
5732 | } | ||||
5733 | |||||
5734 | if (LHS.getValueType() == MVT::i32) { | ||||
5735 | SDValue ARMcc; | ||||
5736 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5737 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5738 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | ||||
5739 | Chain, Dest, ARMcc, CCR, Cmp); | ||||
5740 | } | ||||
5741 | |||||
5742 | if (getTargetMachine().Options.UnsafeFPMath && | ||||
5743 | (CC == ISD::SETEQ || CC == ISD::SETOEQ || | ||||
5744 | CC == ISD::SETNE || CC == ISD::SETUNE)) { | ||||
5745 | if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) | ||||
5746 | return Result; | ||||
5747 | } | ||||
5748 | |||||
5749 | ARMCC::CondCodes CondCode, CondCode2; | ||||
5750 | FPCCToARMCC(CC, CondCode, CondCode2); | ||||
5751 | |||||
5752 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5753 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5754 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5755 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
5756 | SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; | ||||
5757 | SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | ||||
5758 | if (CondCode2 != ARMCC::AL) { | ||||
5759 | ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); | ||||
5760 | SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; | ||||
5761 | Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | ||||
5762 | } | ||||
5763 | return Res; | ||||
5764 | } | ||||
5765 | |||||
5766 | SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { | ||||
5767 | SDValue Chain = Op.getOperand(0); | ||||
5768 | SDValue Table = Op.getOperand(1); | ||||
5769 | SDValue Index = Op.getOperand(2); | ||||
5770 | SDLoc dl(Op); | ||||
5771 | |||||
5772 | EVT PTy = getPointerTy(DAG.getDataLayout()); | ||||
5773 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); | ||||
5774 | SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); | ||||
5775 | Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); | ||||
5776 | Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); | ||||
5777 | SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); | ||||
5778 | if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { | ||||
5779 | // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table | ||||
5780 | // which does another jump to the destination. This also makes it easier | ||||
5781 | // to translate it to TBB / TBH later (Thumb2 only). | ||||
5782 | // FIXME: This might not work if the function is extremely large. | ||||
5783 | return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, | ||||
5784 | Addr, Op.getOperand(2), JTI); | ||||
5785 | } | ||||
5786 | if (isPositionIndependent() || Subtarget->isROPI()) { | ||||
5787 | Addr = | ||||
5788 | DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, | ||||
5789 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | ||||
5790 | Chain = Addr.getValue(1); | ||||
5791 | Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); | ||||
5792 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | ||||
5793 | } else { | ||||
5794 | Addr = | ||||
5795 | DAG.getLoad(PTy, dl, Chain, Addr, | ||||
5796 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | ||||
5797 | Chain = Addr.getValue(1); | ||||
5798 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | ||||
5799 | } | ||||
5800 | } | ||||
5801 | |||||
5802 | static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { | ||||
5803 | EVT VT = Op.getValueType(); | ||||
5804 | SDLoc dl(Op); | ||||
5805 | |||||
5806 | if (Op.getValueType().getVectorElementType() == MVT::i32) { | ||||
5807 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) | ||||
5808 | return Op; | ||||
5809 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5810 | } | ||||
5811 | |||||
5812 | const bool HasFullFP16 = | ||||
5813 | static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16(); | ||||
5814 | |||||
5815 | EVT NewTy; | ||||
5816 | const EVT OpTy = Op.getOperand(0).getValueType(); | ||||
5817 | if (OpTy == MVT::v4f32) | ||||
5818 | NewTy = MVT::v4i32; | ||||
5819 | else if (OpTy == MVT::v4f16 && HasFullFP16) | ||||
5820 | NewTy = MVT::v4i16; | ||||
5821 | else if (OpTy == MVT::v8f16 && HasFullFP16) | ||||
5822 | NewTy = MVT::v8i16; | ||||
5823 | else | ||||
5824 | llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5824); | ||||
5825 | |||||
5826 | if (VT != MVT::v4i16 && VT != MVT::v8i16) | ||||
5827 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5828 | |||||
5829 | Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); | ||||
5830 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); | ||||
5831 | } | ||||
5832 | |||||
5833 | SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { | ||||
5834 | EVT VT = Op.getValueType(); | ||||
5835 | if (VT.isVector()) | ||||
5836 | return LowerVectorFP_TO_INT(Op, DAG); | ||||
5837 | |||||
5838 | bool IsStrict = Op->isStrictFPOpcode(); | ||||
5839 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); | ||||
5840 | |||||
5841 | if (isUnsupportedFloatingType(SrcVal.getValueType())) { | ||||
5842 | RTLIB::Libcall LC; | ||||
5843 | if (Op.getOpcode() == ISD::FP_TO_SINT || | ||||
5844 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT) | ||||
5845 | LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), | ||||
5846 | Op.getValueType()); | ||||
5847 | else | ||||
5848 | LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), | ||||
5849 | Op.getValueType()); | ||||
5850 | SDLoc Loc(Op); | ||||
5851 | MakeLibCallOptions CallOptions; | ||||
5852 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); | ||||
5853 | SDValue Result; | ||||
5854 | std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, | ||||
5855 | CallOptions, Loc, Chain); | ||||
5856 | return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; | ||||
5857 | } | ||||
5858 | |||||
5859 | // FIXME: Remove this when we have strict fp instruction selection patterns | ||||
5860 | if (IsStrict) { | ||||
5861 | SDLoc Loc(Op); | ||||
5862 | SDValue Result = | ||||
5863 | DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT | ||||
5864 | : ISD::FP_TO_UINT, | ||||
5865 | Loc, Op.getValueType(), SrcVal); | ||||
5866 | return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); | ||||
5867 | } | ||||
5868 | |||||
5869 | return Op; | ||||
5870 | } | ||||
5871 | |||||
5872 | static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, | ||||
5873 | const ARMSubtarget *Subtarget) { | ||||
5874 | EVT VT = Op.getValueType(); | ||||
5875 | EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); | ||||
5876 | EVT FromVT = Op.getOperand(0).getValueType(); | ||||
5877 | |||||
5878 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32) | ||||
5879 | return Op; | ||||
5880 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 && | ||||
5881 | Subtarget->hasFP64()) | ||||
5882 | return Op; | ||||
5883 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 && | ||||
5884 | Subtarget->hasFullFP16()) | ||||
5885 | return Op; | ||||
5886 | if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 && | ||||
5887 | Subtarget->hasMVEFloatOps()) | ||||
5888 | return Op; | ||||
5889 | if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 && | ||||
5890 | Subtarget->hasMVEFloatOps()) | ||||
5891 | return Op; | ||||
5892 | |||||
5893 | if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16) | ||||
5894 | return SDValue(); | ||||
5895 | |||||
5896 | SDLoc DL(Op); | ||||
5897 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; | ||||
5898 | unsigned BW = ToVT.getScalarSizeInBits() - IsSigned; | ||||
5899 | SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), | ||||
5900 | DAG.getValueType(VT.getScalarType())); | ||||
5901 | SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT, | ||||
5902 | DAG.getConstant((1 << BW) - 1, DL, VT)); | ||||
5903 | if (IsSigned) | ||||
5904 | Max = DAG.getNode(ISD::SMAX, DL, VT, Max, | ||||
5905 | DAG.getConstant(-(1 << BW), DL, VT)); | ||||
5906 | return Max; | ||||
5907 | } | ||||
5908 | |||||
5909 | static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { | ||||
5910 | EVT VT = Op.getValueType(); | ||||
5911 | SDLoc dl(Op); | ||||
5912 | |||||
5913 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { | ||||
5914 | if (VT.getVectorElementType() == MVT::f32) | ||||
5915 | return Op; | ||||
5916 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5917 | } | ||||
5918 | |||||
5919 | assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5921, __extension__ __PRETTY_FUNCTION__)) | ||||
5920 | Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5921, __extension__ __PRETTY_FUNCTION__)) | ||||
5921 | "Invalid type for custom lowering!")(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5921, __extension__ __PRETTY_FUNCTION__)); | ||||
5922 | |||||
5923 | const bool HasFullFP16 = | ||||
5924 | static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16(); | ||||
5925 | |||||
5926 | EVT DestVecType; | ||||
5927 | if (VT == MVT::v4f32) | ||||
5928 | DestVecType = MVT::v4i32; | ||||
5929 | else if (VT == MVT::v4f16 && HasFullFP16) | ||||
5930 | DestVecType = MVT::v4i16; | ||||
5931 | else if (VT == MVT::v8f16 && HasFullFP16) | ||||
5932 | DestVecType = MVT::v8i16; | ||||
5933 | else | ||||
5934 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5935 | |||||
5936 | unsigned CastOpc; | ||||
5937 | unsigned Opc; | ||||
5938 | switch (Op.getOpcode()) { | ||||
5939 | default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5939); | ||||
5940 | case ISD::SINT_TO_FP: | ||||
5941 | CastOpc = ISD::SIGN_EXTEND; | ||||
5942 | Opc = ISD::SINT_TO_FP; | ||||
5943 | break; | ||||
5944 | case ISD::UINT_TO_FP: | ||||
5945 | CastOpc = ISD::ZERO_EXTEND; | ||||
5946 | Opc = ISD::UINT_TO_FP; | ||||
5947 | break; | ||||
5948 | } | ||||
5949 | |||||
5950 | Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0)); | ||||
5951 | return DAG.getNode(Opc, dl, VT, Op); | ||||
5952 | } | ||||
5953 | |||||
5954 | SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, Selectio |