File: | build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp |
Warning: | line 7258, column 18 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interfaces that ARM uses to lower LLVM code into a |
10 | // selection DAG. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "ARMISelLowering.h" |
15 | #include "ARMBaseInstrInfo.h" |
16 | #include "ARMBaseRegisterInfo.h" |
17 | #include "ARMCallingConv.h" |
18 | #include "ARMConstantPoolValue.h" |
19 | #include "ARMMachineFunctionInfo.h" |
20 | #include "ARMPerfectShuffle.h" |
21 | #include "ARMRegisterInfo.h" |
22 | #include "ARMSelectionDAGInfo.h" |
23 | #include "ARMSubtarget.h" |
24 | #include "ARMTargetTransformInfo.h" |
25 | #include "MCTargetDesc/ARMAddressingModes.h" |
26 | #include "MCTargetDesc/ARMBaseInfo.h" |
27 | #include "Utils/ARMBaseInfo.h" |
28 | #include "llvm/ADT/APFloat.h" |
29 | #include "llvm/ADT/APInt.h" |
30 | #include "llvm/ADT/ArrayRef.h" |
31 | #include "llvm/ADT/BitVector.h" |
32 | #include "llvm/ADT/DenseMap.h" |
33 | #include "llvm/ADT/STLExtras.h" |
34 | #include "llvm/ADT/SmallPtrSet.h" |
35 | #include "llvm/ADT/SmallVector.h" |
36 | #include "llvm/ADT/Statistic.h" |
37 | #include "llvm/ADT/StringExtras.h" |
38 | #include "llvm/ADT/StringRef.h" |
39 | #include "llvm/ADT/StringSwitch.h" |
40 | #include "llvm/ADT/Twine.h" |
41 | #include "llvm/Analysis/VectorUtils.h" |
42 | #include "llvm/CodeGen/CallingConvLower.h" |
43 | #include "llvm/CodeGen/ISDOpcodes.h" |
44 | #include "llvm/CodeGen/IntrinsicLowering.h" |
45 | #include "llvm/CodeGen/MachineBasicBlock.h" |
46 | #include "llvm/CodeGen/MachineConstantPool.h" |
47 | #include "llvm/CodeGen/MachineFrameInfo.h" |
48 | #include "llvm/CodeGen/MachineFunction.h" |
49 | #include "llvm/CodeGen/MachineInstr.h" |
50 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
51 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
52 | #include "llvm/CodeGen/MachineMemOperand.h" |
53 | #include "llvm/CodeGen/MachineOperand.h" |
54 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
55 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
56 | #include "llvm/CodeGen/SelectionDAG.h" |
57 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
58 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
59 | #include "llvm/CodeGen/TargetInstrInfo.h" |
60 | #include "llvm/CodeGen/TargetLowering.h" |
61 | #include "llvm/CodeGen/TargetOpcodes.h" |
62 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
63 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
64 | #include "llvm/CodeGen/ValueTypes.h" |
65 | #include "llvm/IR/Attributes.h" |
66 | #include "llvm/IR/CallingConv.h" |
67 | #include "llvm/IR/Constant.h" |
68 | #include "llvm/IR/Constants.h" |
69 | #include "llvm/IR/DataLayout.h" |
70 | #include "llvm/IR/DebugLoc.h" |
71 | #include "llvm/IR/DerivedTypes.h" |
72 | #include "llvm/IR/Function.h" |
73 | #include "llvm/IR/GlobalAlias.h" |
74 | #include "llvm/IR/GlobalValue.h" |
75 | #include "llvm/IR/GlobalVariable.h" |
76 | #include "llvm/IR/IRBuilder.h" |
77 | #include "llvm/IR/InlineAsm.h" |
78 | #include "llvm/IR/Instruction.h" |
79 | #include "llvm/IR/Instructions.h" |
80 | #include "llvm/IR/IntrinsicInst.h" |
81 | #include "llvm/IR/Intrinsics.h" |
82 | #include "llvm/IR/IntrinsicsARM.h" |
83 | #include "llvm/IR/Module.h" |
84 | #include "llvm/IR/PatternMatch.h" |
85 | #include "llvm/IR/Type.h" |
86 | #include "llvm/IR/User.h" |
87 | #include "llvm/IR/Value.h" |
88 | #include "llvm/MC/MCInstrDesc.h" |
89 | #include "llvm/MC/MCInstrItineraries.h" |
90 | #include "llvm/MC/MCRegisterInfo.h" |
91 | #include "llvm/MC/MCSchedule.h" |
92 | #include "llvm/Support/AtomicOrdering.h" |
93 | #include "llvm/Support/BranchProbability.h" |
94 | #include "llvm/Support/Casting.h" |
95 | #include "llvm/Support/CodeGen.h" |
96 | #include "llvm/Support/CommandLine.h" |
97 | #include "llvm/Support/Compiler.h" |
98 | #include "llvm/Support/Debug.h" |
99 | #include "llvm/Support/ErrorHandling.h" |
100 | #include "llvm/Support/KnownBits.h" |
101 | #include "llvm/Support/MachineValueType.h" |
102 | #include "llvm/Support/MathExtras.h" |
103 | #include "llvm/Support/raw_ostream.h" |
104 | #include "llvm/Target/TargetMachine.h" |
105 | #include "llvm/Target/TargetOptions.h" |
106 | #include "llvm/TargetParser/Triple.h" |
107 | #include <algorithm> |
108 | #include <cassert> |
109 | #include <cstdint> |
110 | #include <cstdlib> |
111 | #include <iterator> |
112 | #include <limits> |
113 | #include <optional> |
114 | #include <string> |
115 | #include <tuple> |
116 | #include <utility> |
117 | #include <vector> |
118 | |
119 | using namespace llvm; |
120 | using namespace llvm::PatternMatch; |
121 | |
122 | #define DEBUG_TYPE"arm-isel" "arm-isel" |
123 | |
124 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls" , "Number of tail calls"}; |
125 | STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt" , "Number of GAs materialized with movw + movt"}; |
126 | STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals" , "Number of loops generated for byval arguments"}; |
127 | STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" } |
128 | "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" }; |
129 | |
130 | static cl::opt<bool> |
131 | ARMInterworking("arm-interworking", cl::Hidden, |
132 | cl::desc("Enable / disable ARM interworking (for debugging only)"), |
133 | cl::init(true)); |
134 | |
135 | static cl::opt<bool> EnableConstpoolPromotion( |
136 | "arm-promote-constant", cl::Hidden, |
137 | cl::desc("Enable / disable promotion of unnamed_addr constants into " |
138 | "constant pools"), |
139 | cl::init(false)); // FIXME: set to true by default once PR32780 is fixed |
140 | static cl::opt<unsigned> ConstpoolPromotionMaxSize( |
141 | "arm-promote-constant-max-size", cl::Hidden, |
142 | cl::desc("Maximum size of constant to promote into a constant pool"), |
143 | cl::init(64)); |
144 | static cl::opt<unsigned> ConstpoolPromotionMaxTotal( |
145 | "arm-promote-constant-max-total", cl::Hidden, |
146 | cl::desc("Maximum size of ALL constants to promote into a constant pool"), |
147 | cl::init(128)); |
148 | |
149 | cl::opt<unsigned> |
150 | MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, |
151 | cl::desc("Maximum interleave factor for MVE VLDn to generate."), |
152 | cl::init(2)); |
153 | |
154 | // The APCS parameter registers. |
155 | static const MCPhysReg GPRArgRegs[] = { |
156 | ARM::R0, ARM::R1, ARM::R2, ARM::R3 |
157 | }; |
158 | |
159 | void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { |
160 | if (VT != PromotedLdStVT) { |
161 | setOperationAction(ISD::LOAD, VT, Promote); |
162 | AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); |
163 | |
164 | setOperationAction(ISD::STORE, VT, Promote); |
165 | AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); |
166 | } |
167 | |
168 | MVT ElemTy = VT.getVectorElementType(); |
169 | if (ElemTy != MVT::f64) |
170 | setOperationAction(ISD::SETCC, VT, Custom); |
171 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
172 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
173 | if (ElemTy == MVT::i32) { |
174 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
175 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
176 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
177 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
178 | } else { |
179 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
180 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
181 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
182 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
183 | } |
184 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
185 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
186 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
187 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
188 | setOperationAction(ISD::SELECT, VT, Expand); |
189 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
190 | setOperationAction(ISD::VSELECT, VT, Expand); |
191 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
192 | if (VT.isInteger()) { |
193 | setOperationAction(ISD::SHL, VT, Custom); |
194 | setOperationAction(ISD::SRA, VT, Custom); |
195 | setOperationAction(ISD::SRL, VT, Custom); |
196 | } |
197 | |
198 | // Neon does not support vector divide/remainder operations. |
199 | setOperationAction(ISD::SDIV, VT, Expand); |
200 | setOperationAction(ISD::UDIV, VT, Expand); |
201 | setOperationAction(ISD::FDIV, VT, Expand); |
202 | setOperationAction(ISD::SREM, VT, Expand); |
203 | setOperationAction(ISD::UREM, VT, Expand); |
204 | setOperationAction(ISD::FREM, VT, Expand); |
205 | setOperationAction(ISD::SDIVREM, VT, Expand); |
206 | setOperationAction(ISD::UDIVREM, VT, Expand); |
207 | |
208 | if (!VT.isFloatingPoint() && |
209 | VT != MVT::v2i64 && VT != MVT::v1i64) |
210 | for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
211 | setOperationAction(Opcode, VT, Legal); |
212 | if (!VT.isFloatingPoint()) |
213 | for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) |
214 | setOperationAction(Opcode, VT, Legal); |
215 | } |
216 | |
217 | void ARMTargetLowering::addDRTypeForNEON(MVT VT) { |
218 | addRegisterClass(VT, &ARM::DPRRegClass); |
219 | addTypeForNEON(VT, MVT::f64); |
220 | } |
221 | |
222 | void ARMTargetLowering::addQRTypeForNEON(MVT VT) { |
223 | addRegisterClass(VT, &ARM::DPairRegClass); |
224 | addTypeForNEON(VT, MVT::v2f64); |
225 | } |
226 | |
227 | void ARMTargetLowering::setAllExpand(MVT VT) { |
228 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) |
229 | setOperationAction(Opc, VT, Expand); |
230 | |
231 | // We support these really simple operations even on types where all |
232 | // the actual arithmetic has to be broken down into simpler |
233 | // operations or turned into library calls. |
234 | setOperationAction(ISD::BITCAST, VT, Legal); |
235 | setOperationAction(ISD::LOAD, VT, Legal); |
236 | setOperationAction(ISD::STORE, VT, Legal); |
237 | setOperationAction(ISD::UNDEF, VT, Legal); |
238 | } |
239 | |
240 | void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, |
241 | LegalizeAction Action) { |
242 | setLoadExtAction(ISD::EXTLOAD, From, To, Action); |
243 | setLoadExtAction(ISD::ZEXTLOAD, From, To, Action); |
244 | setLoadExtAction(ISD::SEXTLOAD, From, To, Action); |
245 | } |
246 | |
247 | void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { |
248 | const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; |
249 | |
250 | for (auto VT : IntTypes) { |
251 | addRegisterClass(VT, &ARM::MQPRRegClass); |
252 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
253 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
254 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
255 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
256 | setOperationAction(ISD::SHL, VT, Custom); |
257 | setOperationAction(ISD::SRA, VT, Custom); |
258 | setOperationAction(ISD::SRL, VT, Custom); |
259 | setOperationAction(ISD::SMIN, VT, Legal); |
260 | setOperationAction(ISD::SMAX, VT, Legal); |
261 | setOperationAction(ISD::UMIN, VT, Legal); |
262 | setOperationAction(ISD::UMAX, VT, Legal); |
263 | setOperationAction(ISD::ABS, VT, Legal); |
264 | setOperationAction(ISD::SETCC, VT, Custom); |
265 | setOperationAction(ISD::MLOAD, VT, Custom); |
266 | setOperationAction(ISD::MSTORE, VT, Legal); |
267 | setOperationAction(ISD::CTLZ, VT, Legal); |
268 | setOperationAction(ISD::CTTZ, VT, Custom); |
269 | setOperationAction(ISD::BITREVERSE, VT, Legal); |
270 | setOperationAction(ISD::BSWAP, VT, Legal); |
271 | setOperationAction(ISD::SADDSAT, VT, Legal); |
272 | setOperationAction(ISD::UADDSAT, VT, Legal); |
273 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
274 | setOperationAction(ISD::USUBSAT, VT, Legal); |
275 | setOperationAction(ISD::ABDS, VT, Legal); |
276 | setOperationAction(ISD::ABDU, VT, Legal); |
277 | setOperationAction(ISD::AVGFLOORS, VT, Legal); |
278 | setOperationAction(ISD::AVGFLOORU, VT, Legal); |
279 | setOperationAction(ISD::AVGCEILS, VT, Legal); |
280 | setOperationAction(ISD::AVGCEILU, VT, Legal); |
281 | |
282 | // No native support for these. |
283 | setOperationAction(ISD::UDIV, VT, Expand); |
284 | setOperationAction(ISD::SDIV, VT, Expand); |
285 | setOperationAction(ISD::UREM, VT, Expand); |
286 | setOperationAction(ISD::SREM, VT, Expand); |
287 | setOperationAction(ISD::UDIVREM, VT, Expand); |
288 | setOperationAction(ISD::SDIVREM, VT, Expand); |
289 | setOperationAction(ISD::CTPOP, VT, Expand); |
290 | setOperationAction(ISD::SELECT, VT, Expand); |
291 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
292 | |
293 | // Vector reductions |
294 | setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); |
295 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); |
296 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); |
297 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); |
298 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); |
299 | setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); |
300 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
301 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
302 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
303 | |
304 | if (!HasMVEFP) { |
305 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
306 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
307 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
308 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
309 | } else { |
310 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
311 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
312 | } |
313 | |
314 | // Pre and Post inc are supported on loads and stores |
315 | for (unsigned im = (unsigned)ISD::PRE_INC; |
316 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
317 | setIndexedLoadAction(im, VT, Legal); |
318 | setIndexedStoreAction(im, VT, Legal); |
319 | setIndexedMaskedLoadAction(im, VT, Legal); |
320 | setIndexedMaskedStoreAction(im, VT, Legal); |
321 | } |
322 | } |
323 | |
324 | const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; |
325 | for (auto VT : FloatTypes) { |
326 | addRegisterClass(VT, &ARM::MQPRRegClass); |
327 | if (!HasMVEFP) |
328 | setAllExpand(VT); |
329 | |
330 | // These are legal or custom whether we have MVE.fp or not |
331 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
332 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
333 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom); |
334 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
335 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
336 | setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); |
337 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); |
338 | setOperationAction(ISD::SETCC, VT, Custom); |
339 | setOperationAction(ISD::MLOAD, VT, Custom); |
340 | setOperationAction(ISD::MSTORE, VT, Legal); |
341 | setOperationAction(ISD::SELECT, VT, Expand); |
342 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
343 | |
344 | // Pre and Post inc are supported on loads and stores |
345 | for (unsigned im = (unsigned)ISD::PRE_INC; |
346 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
347 | setIndexedLoadAction(im, VT, Legal); |
348 | setIndexedStoreAction(im, VT, Legal); |
349 | setIndexedMaskedLoadAction(im, VT, Legal); |
350 | setIndexedMaskedStoreAction(im, VT, Legal); |
351 | } |
352 | |
353 | if (HasMVEFP) { |
354 | setOperationAction(ISD::FMINNUM, VT, Legal); |
355 | setOperationAction(ISD::FMAXNUM, VT, Legal); |
356 | setOperationAction(ISD::FROUND, VT, Legal); |
357 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
358 | setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); |
359 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
360 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
361 | |
362 | // No native support for these. |
363 | setOperationAction(ISD::FDIV, VT, Expand); |
364 | setOperationAction(ISD::FREM, VT, Expand); |
365 | setOperationAction(ISD::FSQRT, VT, Expand); |
366 | setOperationAction(ISD::FSIN, VT, Expand); |
367 | setOperationAction(ISD::FCOS, VT, Expand); |
368 | setOperationAction(ISD::FPOW, VT, Expand); |
369 | setOperationAction(ISD::FLOG, VT, Expand); |
370 | setOperationAction(ISD::FLOG2, VT, Expand); |
371 | setOperationAction(ISD::FLOG10, VT, Expand); |
372 | setOperationAction(ISD::FEXP, VT, Expand); |
373 | setOperationAction(ISD::FEXP2, VT, Expand); |
374 | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
375 | } |
376 | } |
377 | |
378 | // Custom Expand smaller than legal vector reductions to prevent false zero |
379 | // items being added. |
380 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom); |
381 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom); |
382 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom); |
383 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom); |
384 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); |
385 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom); |
386 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); |
387 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); |
388 | |
389 | // We 'support' these types up to bitcast/load/store level, regardless of |
390 | // MVE integer-only / float support. Only doing FP data processing on the FP |
391 | // vector types is inhibited at integer-only level. |
392 | const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; |
393 | for (auto VT : LongTypes) { |
394 | addRegisterClass(VT, &ARM::MQPRRegClass); |
395 | setAllExpand(VT); |
396 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
397 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
398 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
399 | setOperationAction(ISD::VSELECT, VT, Legal); |
400 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
401 | } |
402 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
403 | |
404 | // We can do bitwise operations on v2i64 vectors |
405 | setOperationAction(ISD::AND, MVT::v2i64, Legal); |
406 | setOperationAction(ISD::OR, MVT::v2i64, Legal); |
407 | setOperationAction(ISD::XOR, MVT::v2i64, Legal); |
408 | |
409 | // It is legal to extload from v4i8 to v4i16 or v4i32. |
410 | addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); |
411 | addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); |
412 | addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); |
413 | |
414 | // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. |
415 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); |
416 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); |
417 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); |
418 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); |
419 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); |
420 | |
421 | // Some truncating stores are legal too. |
422 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
423 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
424 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
425 | |
426 | // Pre and Post inc on these are legal, given the correct extends |
427 | for (unsigned im = (unsigned)ISD::PRE_INC; |
428 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
429 | for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { |
430 | setIndexedLoadAction(im, VT, Legal); |
431 | setIndexedStoreAction(im, VT, Legal); |
432 | setIndexedMaskedLoadAction(im, VT, Legal); |
433 | setIndexedMaskedStoreAction(im, VT, Legal); |
434 | } |
435 | } |
436 | |
437 | // Predicate types |
438 | const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1}; |
439 | for (auto VT : pTypes) { |
440 | addRegisterClass(VT, &ARM::VCCRRegClass); |
441 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
442 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
443 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
444 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
445 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
446 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
447 | setOperationAction(ISD::SETCC, VT, Custom); |
448 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); |
449 | setOperationAction(ISD::LOAD, VT, Custom); |
450 | setOperationAction(ISD::STORE, VT, Custom); |
451 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
452 | setOperationAction(ISD::VSELECT, VT, Expand); |
453 | setOperationAction(ISD::SELECT, VT, Expand); |
454 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
455 | |
456 | if (!HasMVEFP) { |
457 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
458 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
459 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
460 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
461 | } |
462 | } |
463 | setOperationAction(ISD::SETCC, MVT::v2i1, Expand); |
464 | setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); |
465 | setOperationAction(ISD::AND, MVT::v2i1, Expand); |
466 | setOperationAction(ISD::OR, MVT::v2i1, Expand); |
467 | setOperationAction(ISD::XOR, MVT::v2i1, Expand); |
468 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand); |
469 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand); |
470 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand); |
471 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand); |
472 | |
473 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); |
474 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); |
475 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
476 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); |
477 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); |
478 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
479 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); |
480 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); |
481 | } |
482 | |
483 | ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, |
484 | const ARMSubtarget &STI) |
485 | : TargetLowering(TM), Subtarget(&STI) { |
486 | RegInfo = Subtarget->getRegisterInfo(); |
487 | Itins = Subtarget->getInstrItineraryData(); |
488 | |
489 | setBooleanContents(ZeroOrOneBooleanContent); |
490 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
491 | |
492 | if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && |
493 | !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { |
494 | bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; |
495 | for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) |
496 | setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), |
497 | IsHFTarget ? CallingConv::ARM_AAPCS_VFP |
498 | : CallingConv::ARM_AAPCS); |
499 | } |
500 | |
501 | if (Subtarget->isTargetMachO()) { |
502 | // Uses VFP for Thumb libfuncs if available. |
503 | if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && |
504 | Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { |
505 | static const struct { |
506 | const RTLIB::Libcall Op; |
507 | const char * const Name; |
508 | const ISD::CondCode Cond; |
509 | } LibraryCalls[] = { |
510 | // Single-precision floating-point arithmetic. |
511 | { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, |
512 | { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, |
513 | { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, |
514 | { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, |
515 | |
516 | // Double-precision floating-point arithmetic. |
517 | { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, |
518 | { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, |
519 | { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, |
520 | { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, |
521 | |
522 | // Single-precision comparisons. |
523 | { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, |
524 | { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, |
525 | { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, |
526 | { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, |
527 | { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, |
528 | { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, |
529 | { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, |
530 | |
531 | // Double-precision comparisons. |
532 | { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, |
533 | { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, |
534 | { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, |
535 | { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, |
536 | { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, |
537 | { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, |
538 | { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, |
539 | |
540 | // Floating-point to integer conversions. |
541 | // i64 conversions are done via library routines even when generating VFP |
542 | // instructions, so use the same ones. |
543 | { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, |
544 | { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, |
545 | { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, |
546 | { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, |
547 | |
548 | // Conversions between floating types. |
549 | { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, |
550 | { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, |
551 | |
552 | // Integer to floating-point conversions. |
553 | // i64 conversions are done via library routines even when generating VFP |
554 | // instructions, so use the same ones. |
555 | // FIXME: There appears to be some naming inconsistency in ARM libgcc: |
556 | // e.g., __floatunsidf vs. __floatunssidfvfp. |
557 | { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, |
558 | { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, |
559 | { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, |
560 | { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, |
561 | }; |
562 | |
563 | for (const auto &LC : LibraryCalls) { |
564 | setLibcallName(LC.Op, LC.Name); |
565 | if (LC.Cond != ISD::SETCC_INVALID) |
566 | setCmpLibcallCC(LC.Op, LC.Cond); |
567 | } |
568 | } |
569 | } |
570 | |
571 | // These libcalls are not available in 32-bit. |
572 | setLibcallName(RTLIB::SHL_I128, nullptr); |
573 | setLibcallName(RTLIB::SRL_I128, nullptr); |
574 | setLibcallName(RTLIB::SRA_I128, nullptr); |
575 | setLibcallName(RTLIB::MUL_I128, nullptr); |
576 | setLibcallName(RTLIB::MULO_I64, nullptr); |
577 | setLibcallName(RTLIB::MULO_I128, nullptr); |
578 | |
579 | // RTLIB |
580 | if (Subtarget->isAAPCS_ABI() && |
581 | (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || |
582 | Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { |
583 | static const struct { |
584 | const RTLIB::Libcall Op; |
585 | const char * const Name; |
586 | const CallingConv::ID CC; |
587 | const ISD::CondCode Cond; |
588 | } LibraryCalls[] = { |
589 | // Double-precision floating-point arithmetic helper functions |
590 | // RTABI chapter 4.1.2, Table 2 |
591 | { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
592 | { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
593 | { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
594 | { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
595 | |
596 | // Double-precision floating-point comparison helper functions |
597 | // RTABI chapter 4.1.2, Table 3 |
598 | { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
599 | { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
600 | { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
601 | { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
602 | { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
603 | { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
604 | { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
605 | |
606 | // Single-precision floating-point arithmetic helper functions |
607 | // RTABI chapter 4.1.2, Table 4 |
608 | { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
609 | { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
610 | { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
611 | { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
612 | |
613 | // Single-precision floating-point comparison helper functions |
614 | // RTABI chapter 4.1.2, Table 5 |
615 | { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, |
616 | { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, |
617 | { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
618 | { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, |
619 | { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, |
620 | { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, |
621 | { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, |
622 | |
623 | // Floating-point to integer conversions. |
624 | // RTABI chapter 4.1.2, Table 6 |
625 | { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
626 | { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
627 | { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
628 | { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
629 | { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
630 | { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
631 | { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
632 | { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
633 | |
634 | // Conversions between floating types. |
635 | // RTABI chapter 4.1.2, Table 7 |
636 | { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
637 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
638 | { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
639 | |
640 | // Integer to floating-point conversions. |
641 | // RTABI chapter 4.1.2, Table 8 |
642 | { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
643 | { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
644 | { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
645 | { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
646 | { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
647 | { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
648 | { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
649 | { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
650 | |
651 | // Long long helper functions |
652 | // RTABI chapter 4.2, Table 9 |
653 | { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
654 | { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
655 | { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
656 | { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
657 | |
658 | // Integer division functions |
659 | // RTABI chapter 4.3.1 |
660 | { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
661 | { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
662 | { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
663 | { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
664 | { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
665 | { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
666 | { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
667 | { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
668 | }; |
669 | |
670 | for (const auto &LC : LibraryCalls) { |
671 | setLibcallName(LC.Op, LC.Name); |
672 | setLibcallCallingConv(LC.Op, LC.CC); |
673 | if (LC.Cond != ISD::SETCC_INVALID) |
674 | setCmpLibcallCC(LC.Op, LC.Cond); |
675 | } |
676 | |
677 | // EABI dependent RTLIB |
678 | if (TM.Options.EABIVersion == EABI::EABI4 || |
679 | TM.Options.EABIVersion == EABI::EABI5) { |
680 | static const struct { |
681 | const RTLIB::Libcall Op; |
682 | const char *const Name; |
683 | const CallingConv::ID CC; |
684 | const ISD::CondCode Cond; |
685 | } MemOpsLibraryCalls[] = { |
686 | // Memory operations |
687 | // RTABI chapter 4.3.4 |
688 | { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
689 | { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
690 | { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, |
691 | }; |
692 | |
693 | for (const auto &LC : MemOpsLibraryCalls) { |
694 | setLibcallName(LC.Op, LC.Name); |
695 | setLibcallCallingConv(LC.Op, LC.CC); |
696 | if (LC.Cond != ISD::SETCC_INVALID) |
697 | setCmpLibcallCC(LC.Op, LC.Cond); |
698 | } |
699 | } |
700 | } |
701 | |
702 | if (Subtarget->isTargetWindows()) { |
703 | static const struct { |
704 | const RTLIB::Libcall Op; |
705 | const char * const Name; |
706 | const CallingConv::ID CC; |
707 | } LibraryCalls[] = { |
708 | { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, |
709 | { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, |
710 | { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, |
711 | { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, |
712 | { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, |
713 | { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, |
714 | { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, |
715 | { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, |
716 | }; |
717 | |
718 | for (const auto &LC : LibraryCalls) { |
719 | setLibcallName(LC.Op, LC.Name); |
720 | setLibcallCallingConv(LC.Op, LC.CC); |
721 | } |
722 | } |
723 | |
724 | // Use divmod compiler-rt calls for iOS 5.0 and later. |
725 | if (Subtarget->isTargetMachO() && |
726 | !(Subtarget->isTargetIOS() && |
727 | Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { |
728 | setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); |
729 | setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); |
730 | } |
731 | |
732 | // The half <-> float conversion functions are always soft-float on |
733 | // non-watchos platforms, but are needed for some targets which use a |
734 | // hard-float calling convention by default. |
735 | if (!Subtarget->isTargetWatchABI()) { |
736 | if (Subtarget->isAAPCS_ABI()) { |
737 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); |
738 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); |
739 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); |
740 | } else { |
741 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); |
742 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); |
743 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); |
744 | } |
745 | } |
746 | |
747 | // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have |
748 | // a __gnu_ prefix (which is the default). |
749 | if (Subtarget->isTargetAEABI()) { |
750 | static const struct { |
751 | const RTLIB::Libcall Op; |
752 | const char * const Name; |
753 | const CallingConv::ID CC; |
754 | } LibraryCalls[] = { |
755 | { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, |
756 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, |
757 | { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, |
758 | }; |
759 | |
760 | for (const auto &LC : LibraryCalls) { |
761 | setLibcallName(LC.Op, LC.Name); |
762 | setLibcallCallingConv(LC.Op, LC.CC); |
763 | } |
764 | } |
765 | |
766 | if (Subtarget->isThumb1Only()) |
767 | addRegisterClass(MVT::i32, &ARM::tGPRRegClass); |
768 | else |
769 | addRegisterClass(MVT::i32, &ARM::GPRRegClass); |
770 | |
771 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && |
772 | Subtarget->hasFPRegs()) { |
773 | addRegisterClass(MVT::f32, &ARM::SPRRegClass); |
774 | addRegisterClass(MVT::f64, &ARM::DPRRegClass); |
775 | |
776 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); |
777 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); |
778 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
779 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
780 | |
781 | if (!Subtarget->hasVFP2Base()) |
782 | setAllExpand(MVT::f32); |
783 | if (!Subtarget->hasFP64()) |
784 | setAllExpand(MVT::f64); |
785 | } |
786 | |
787 | if (Subtarget->hasFullFP16()) { |
788 | addRegisterClass(MVT::f16, &ARM::HPRRegClass); |
789 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
790 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
791 | |
792 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
793 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
794 | } |
795 | |
796 | if (Subtarget->hasBF16()) { |
797 | addRegisterClass(MVT::bf16, &ARM::HPRRegClass); |
798 | setAllExpand(MVT::bf16); |
799 | if (!Subtarget->hasFullFP16()) |
800 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
801 | } |
802 | |
803 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
804 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
805 | setTruncStoreAction(VT, InnerVT, Expand); |
806 | addAllExtLoads(VT, InnerVT, Expand); |
807 | } |
808 | |
809 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
810 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
811 | |
812 | setOperationAction(ISD::BSWAP, VT, Expand); |
813 | } |
814 | |
815 | setOperationAction(ISD::ConstantFP, MVT::f32, Custom); |
816 | setOperationAction(ISD::ConstantFP, MVT::f64, Custom); |
817 | |
818 | setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); |
819 | setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); |
820 | |
821 | if (Subtarget->hasMVEIntegerOps()) |
822 | addMVEVectorTypes(Subtarget->hasMVEFloatOps()); |
823 | |
824 | // Combine low-overhead loop intrinsics so that we can lower i1 types. |
825 | if (Subtarget->hasLOB()) { |
826 | setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); |
827 | } |
828 | |
829 | if (Subtarget->hasNEON()) { |
830 | addDRTypeForNEON(MVT::v2f32); |
831 | addDRTypeForNEON(MVT::v8i8); |
832 | addDRTypeForNEON(MVT::v4i16); |
833 | addDRTypeForNEON(MVT::v2i32); |
834 | addDRTypeForNEON(MVT::v1i64); |
835 | |
836 | addQRTypeForNEON(MVT::v4f32); |
837 | addQRTypeForNEON(MVT::v2f64); |
838 | addQRTypeForNEON(MVT::v16i8); |
839 | addQRTypeForNEON(MVT::v8i16); |
840 | addQRTypeForNEON(MVT::v4i32); |
841 | addQRTypeForNEON(MVT::v2i64); |
842 | |
843 | if (Subtarget->hasFullFP16()) { |
844 | addQRTypeForNEON(MVT::v8f16); |
845 | addDRTypeForNEON(MVT::v4f16); |
846 | } |
847 | |
848 | if (Subtarget->hasBF16()) { |
849 | addQRTypeForNEON(MVT::v8bf16); |
850 | addDRTypeForNEON(MVT::v4bf16); |
851 | } |
852 | } |
853 | |
854 | if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { |
855 | // v2f64 is legal so that QR subregs can be extracted as f64 elements, but |
856 | // none of Neon, MVE or VFP supports any arithmetic operations on it. |
857 | setOperationAction(ISD::FADD, MVT::v2f64, Expand); |
858 | setOperationAction(ISD::FSUB, MVT::v2f64, Expand); |
859 | setOperationAction(ISD::FMUL, MVT::v2f64, Expand); |
860 | // FIXME: Code duplication: FDIV and FREM are expanded always, see |
861 | // ARMTargetLowering::addTypeForNEON method for details. |
862 | setOperationAction(ISD::FDIV, MVT::v2f64, Expand); |
863 | setOperationAction(ISD::FREM, MVT::v2f64, Expand); |
864 | // FIXME: Create unittest. |
865 | // In another words, find a way when "copysign" appears in DAG with vector |
866 | // operands. |
867 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); |
868 | // FIXME: Code duplication: SETCC has custom operation action, see |
869 | // ARMTargetLowering::addTypeForNEON method for details. |
870 | setOperationAction(ISD::SETCC, MVT::v2f64, Expand); |
871 | // FIXME: Create unittest for FNEG and for FABS. |
872 | setOperationAction(ISD::FNEG, MVT::v2f64, Expand); |
873 | setOperationAction(ISD::FABS, MVT::v2f64, Expand); |
874 | setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); |
875 | setOperationAction(ISD::FSIN, MVT::v2f64, Expand); |
876 | setOperationAction(ISD::FCOS, MVT::v2f64, Expand); |
877 | setOperationAction(ISD::FPOW, MVT::v2f64, Expand); |
878 | setOperationAction(ISD::FLOG, MVT::v2f64, Expand); |
879 | setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); |
880 | setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); |
881 | setOperationAction(ISD::FEXP, MVT::v2f64, Expand); |
882 | setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); |
883 | // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. |
884 | setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); |
885 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); |
886 | setOperationAction(ISD::FRINT, MVT::v2f64, Expand); |
887 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); |
888 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); |
889 | setOperationAction(ISD::FMA, MVT::v2f64, Expand); |
890 | } |
891 | |
892 | if (Subtarget->hasNEON()) { |
893 | // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively |
894 | // supported for v4f32. |
895 | setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); |
896 | setOperationAction(ISD::FSIN, MVT::v4f32, Expand); |
897 | setOperationAction(ISD::FCOS, MVT::v4f32, Expand); |
898 | setOperationAction(ISD::FPOW, MVT::v4f32, Expand); |
899 | setOperationAction(ISD::FLOG, MVT::v4f32, Expand); |
900 | setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); |
901 | setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); |
902 | setOperationAction(ISD::FEXP, MVT::v4f32, Expand); |
903 | setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); |
904 | setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); |
905 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); |
906 | setOperationAction(ISD::FRINT, MVT::v4f32, Expand); |
907 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); |
908 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); |
909 | |
910 | // Mark v2f32 intrinsics. |
911 | setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); |
912 | setOperationAction(ISD::FSIN, MVT::v2f32, Expand); |
913 | setOperationAction(ISD::FCOS, MVT::v2f32, Expand); |
914 | setOperationAction(ISD::FPOW, MVT::v2f32, Expand); |
915 | setOperationAction(ISD::FLOG, MVT::v2f32, Expand); |
916 | setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); |
917 | setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); |
918 | setOperationAction(ISD::FEXP, MVT::v2f32, Expand); |
919 | setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); |
920 | setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); |
921 | setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); |
922 | setOperationAction(ISD::FRINT, MVT::v2f32, Expand); |
923 | setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); |
924 | setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); |
925 | |
926 | // Neon does not support some operations on v1i64 and v2i64 types. |
927 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
928 | // Custom handling for some quad-vector types to detect VMULL. |
929 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
930 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
931 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
932 | // Custom handling for some vector types to avoid expensive expansions |
933 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
934 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
935 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
936 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
937 | // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with |
938 | // a destination type that is wider than the source, and nor does |
939 | // it have a FP_TO_[SU]INT instruction with a narrower destination than |
940 | // source. |
941 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
942 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
943 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
944 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
945 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); |
946 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); |
947 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); |
948 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); |
949 | |
950 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); |
951 | setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); |
952 | |
953 | // NEON does not have single instruction CTPOP for vectors with element |
954 | // types wider than 8-bits. However, custom lowering can leverage the |
955 | // v8i8/v16i8 vcnt instruction. |
956 | setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); |
957 | setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); |
958 | setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); |
959 | setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); |
960 | setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); |
961 | setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); |
962 | |
963 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
964 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
965 | |
966 | // NEON does not have single instruction CTTZ for vectors. |
967 | setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); |
968 | setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); |
969 | setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); |
970 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
971 | |
972 | setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); |
973 | setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); |
974 | setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); |
975 | setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); |
976 | |
977 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); |
978 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); |
979 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); |
980 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); |
981 | |
982 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); |
983 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); |
984 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); |
985 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); |
986 | |
987 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
988 | setOperationAction(ISD::MULHS, VT, Expand); |
989 | setOperationAction(ISD::MULHU, VT, Expand); |
990 | } |
991 | |
992 | // NEON only has FMA instructions as of VFP4. |
993 | if (!Subtarget->hasVFP4Base()) { |
994 | setOperationAction(ISD::FMA, MVT::v2f32, Expand); |
995 | setOperationAction(ISD::FMA, MVT::v4f32, Expand); |
996 | } |
997 | |
998 | setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, |
999 | ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); |
1000 | |
1001 | // It is legal to extload from v4i8 to v4i16 or v4i32. |
1002 | for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, |
1003 | MVT::v2i32}) { |
1004 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
1005 | setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); |
1006 | setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); |
1007 | setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); |
1008 | } |
1009 | } |
1010 | |
1011 | for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, |
1012 | MVT::v4i32}) { |
1013 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1014 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1015 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1016 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1017 | } |
1018 | } |
1019 | |
1020 | if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { |
1021 | setTargetDAGCombine( |
1022 | {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, |
1023 | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
1024 | ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, |
1025 | ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, |
1026 | ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); |
1027 | } |
1028 | if (Subtarget->hasMVEIntegerOps()) { |
1029 | setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, |
1030 | ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, |
1031 | ISD::SETCC}); |
1032 | } |
1033 | if (Subtarget->hasMVEFloatOps()) { |
1034 | setTargetDAGCombine(ISD::FADD); |
1035 | } |
1036 | |
1037 | if (!Subtarget->hasFP64()) { |
1038 | // When targeting a floating-point unit with only single-precision |
1039 | // operations, f64 is legal for the few double-precision instructions which |
1040 | // are present However, no double-precision operations other than moves, |
1041 | // loads and stores are provided by the hardware. |
1042 | setOperationAction(ISD::FADD, MVT::f64, Expand); |
1043 | setOperationAction(ISD::FSUB, MVT::f64, Expand); |
1044 | setOperationAction(ISD::FMUL, MVT::f64, Expand); |
1045 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
1046 | setOperationAction(ISD::FDIV, MVT::f64, Expand); |
1047 | setOperationAction(ISD::FREM, MVT::f64, Expand); |
1048 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
1049 | setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); |
1050 | setOperationAction(ISD::FNEG, MVT::f64, Expand); |
1051 | setOperationAction(ISD::FABS, MVT::f64, Expand); |
1052 | setOperationAction(ISD::FSQRT, MVT::f64, Expand); |
1053 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
1054 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
1055 | setOperationAction(ISD::FPOW, MVT::f64, Expand); |
1056 | setOperationAction(ISD::FLOG, MVT::f64, Expand); |
1057 | setOperationAction(ISD::FLOG2, MVT::f64, Expand); |
1058 | setOperationAction(ISD::FLOG10, MVT::f64, Expand); |
1059 | setOperationAction(ISD::FEXP, MVT::f64, Expand); |
1060 | setOperationAction(ISD::FEXP2, MVT::f64, Expand); |
1061 | setOperationAction(ISD::FCEIL, MVT::f64, Expand); |
1062 | setOperationAction(ISD::FTRUNC, MVT::f64, Expand); |
1063 | setOperationAction(ISD::FRINT, MVT::f64, Expand); |
1064 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); |
1065 | setOperationAction(ISD::FFLOOR, MVT::f64, Expand); |
1066 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
1067 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
1068 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
1069 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
1070 | setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); |
1071 | setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); |
1072 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
1073 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
1074 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
1075 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); |
1076 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); |
1077 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
1078 | } |
1079 | |
1080 | if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { |
1081 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); |
1082 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); |
1083 | if (Subtarget->hasFullFP16()) { |
1084 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
1085 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
1086 | } |
1087 | } |
1088 | |
1089 | if (!Subtarget->hasFP16()) { |
1090 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); |
1091 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); |
1092 | } |
1093 | |
1094 | computeRegisterProperties(Subtarget->getRegisterInfo()); |
1095 | |
1096 | // ARM does not have floating-point extending loads. |
1097 | for (MVT VT : MVT::fp_valuetypes()) { |
1098 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
1099 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
1100 | } |
1101 | |
1102 | // ... or truncating stores |
1103 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
1104 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
1105 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
1106 | |
1107 | // ARM does not have i1 sign extending load. |
1108 | for (MVT VT : MVT::integer_valuetypes()) |
1109 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
1110 | |
1111 | // ARM supports all 4 flavors of integer indexed load / store. |
1112 | if (!Subtarget->isThumb1Only()) { |
1113 | for (unsigned im = (unsigned)ISD::PRE_INC; |
1114 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
1115 | setIndexedLoadAction(im, MVT::i1, Legal); |
1116 | setIndexedLoadAction(im, MVT::i8, Legal); |
1117 | setIndexedLoadAction(im, MVT::i16, Legal); |
1118 | setIndexedLoadAction(im, MVT::i32, Legal); |
1119 | setIndexedStoreAction(im, MVT::i1, Legal); |
1120 | setIndexedStoreAction(im, MVT::i8, Legal); |
1121 | setIndexedStoreAction(im, MVT::i16, Legal); |
1122 | setIndexedStoreAction(im, MVT::i32, Legal); |
1123 | } |
1124 | } else { |
1125 | // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. |
1126 | setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); |
1127 | setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); |
1128 | } |
1129 | |
1130 | setOperationAction(ISD::SADDO, MVT::i32, Custom); |
1131 | setOperationAction(ISD::UADDO, MVT::i32, Custom); |
1132 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
1133 | setOperationAction(ISD::USUBO, MVT::i32, Custom); |
1134 | |
1135 | setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); |
1136 | setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); |
1137 | if (Subtarget->hasDSP()) { |
1138 | setOperationAction(ISD::SADDSAT, MVT::i8, Custom); |
1139 | setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); |
1140 | setOperationAction(ISD::SADDSAT, MVT::i16, Custom); |
1141 | setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); |
1142 | setOperationAction(ISD::UADDSAT, MVT::i8, Custom); |
1143 | setOperationAction(ISD::USUBSAT, MVT::i8, Custom); |
1144 | setOperationAction(ISD::UADDSAT, MVT::i16, Custom); |
1145 | setOperationAction(ISD::USUBSAT, MVT::i16, Custom); |
1146 | } |
1147 | if (Subtarget->hasBaseDSP()) { |
1148 | setOperationAction(ISD::SADDSAT, MVT::i32, Legal); |
1149 | setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); |
1150 | } |
1151 | |
1152 | // i64 operation support. |
1153 | setOperationAction(ISD::MUL, MVT::i64, Expand); |
1154 | setOperationAction(ISD::MULHU, MVT::i32, Expand); |
1155 | if (Subtarget->isThumb1Only()) { |
1156 | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); |
1157 | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); |
1158 | } |
1159 | if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() |
1160 | || (Subtarget->isThumb2() && !Subtarget->hasDSP())) |
1161 | setOperationAction(ISD::MULHS, MVT::i32, Expand); |
1162 | |
1163 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
1164 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
1165 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
1166 | setOperationAction(ISD::SRL, MVT::i64, Custom); |
1167 | setOperationAction(ISD::SRA, MVT::i64, Custom); |
1168 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
1169 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); |
1170 | setOperationAction(ISD::LOAD, MVT::i64, Custom); |
1171 | setOperationAction(ISD::STORE, MVT::i64, Custom); |
1172 | |
1173 | // MVE lowers 64 bit shifts to lsll and lsrl |
1174 | // assuming that ISD::SRL and SRA of i64 are already marked custom |
1175 | if (Subtarget->hasMVEIntegerOps()) |
1176 | setOperationAction(ISD::SHL, MVT::i64, Custom); |
1177 | |
1178 | // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. |
1179 | if (Subtarget->isThumb1Only()) { |
1180 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); |
1181 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); |
1182 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); |
1183 | } |
1184 | |
1185 | if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) |
1186 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
1187 | |
1188 | // ARM does not have ROTL. |
1189 | setOperationAction(ISD::ROTL, MVT::i32, Expand); |
1190 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
1191 | setOperationAction(ISD::ROTL, VT, Expand); |
1192 | setOperationAction(ISD::ROTR, VT, Expand); |
1193 | } |
1194 | setOperationAction(ISD::CTTZ, MVT::i32, Custom); |
1195 | setOperationAction(ISD::CTPOP, MVT::i32, Expand); |
1196 | if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { |
1197 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); |
1198 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); |
1199 | } |
1200 | |
1201 | // @llvm.readcyclecounter requires the Performance Monitors extension. |
1202 | // Default to the 0 expansion on unsupported platforms. |
1203 | // FIXME: Technically there are older ARM CPUs that have |
1204 | // implementation-specific ways of obtaining this information. |
1205 | if (Subtarget->hasPerfMon()) |
1206 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); |
1207 | |
1208 | // Only ARMv6 has BSWAP. |
1209 | if (!Subtarget->hasV6Ops()) |
1210 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); |
1211 | |
1212 | bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() |
1213 | : Subtarget->hasDivideInARMMode(); |
1214 | if (!hasDivide) { |
1215 | // These are expanded into libcalls if the cpu doesn't have HW divider. |
1216 | setOperationAction(ISD::SDIV, MVT::i32, LibCall); |
1217 | setOperationAction(ISD::UDIV, MVT::i32, LibCall); |
1218 | } |
1219 | |
1220 | if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { |
1221 | setOperationAction(ISD::SDIV, MVT::i32, Custom); |
1222 | setOperationAction(ISD::UDIV, MVT::i32, Custom); |
1223 | |
1224 | setOperationAction(ISD::SDIV, MVT::i64, Custom); |
1225 | setOperationAction(ISD::UDIV, MVT::i64, Custom); |
1226 | } |
1227 | |
1228 | setOperationAction(ISD::SREM, MVT::i32, Expand); |
1229 | setOperationAction(ISD::UREM, MVT::i32, Expand); |
1230 | |
1231 | // Register based DivRem for AEABI (RTABI 4.2) |
1232 | if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || |
1233 | Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || |
1234 | Subtarget->isTargetWindows()) { |
1235 | setOperationAction(ISD::SREM, MVT::i64, Custom); |
1236 | setOperationAction(ISD::UREM, MVT::i64, Custom); |
1237 | HasStandaloneRem = false; |
1238 | |
1239 | if (Subtarget->isTargetWindows()) { |
1240 | const struct { |
1241 | const RTLIB::Libcall Op; |
1242 | const char * const Name; |
1243 | const CallingConv::ID CC; |
1244 | } LibraryCalls[] = { |
1245 | { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
1246 | { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
1247 | { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, |
1248 | { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, |
1249 | |
1250 | { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, |
1251 | { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, |
1252 | { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, |
1253 | { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, |
1254 | }; |
1255 | |
1256 | for (const auto &LC : LibraryCalls) { |
1257 | setLibcallName(LC.Op, LC.Name); |
1258 | setLibcallCallingConv(LC.Op, LC.CC); |
1259 | } |
1260 | } else { |
1261 | const struct { |
1262 | const RTLIB::Libcall Op; |
1263 | const char * const Name; |
1264 | const CallingConv::ID CC; |
1265 | } LibraryCalls[] = { |
1266 | { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
1267 | { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
1268 | { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, |
1269 | { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, |
1270 | |
1271 | { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
1272 | { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
1273 | { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, |
1274 | { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, |
1275 | }; |
1276 | |
1277 | for (const auto &LC : LibraryCalls) { |
1278 | setLibcallName(LC.Op, LC.Name); |
1279 | setLibcallCallingConv(LC.Op, LC.CC); |
1280 | } |
1281 | } |
1282 | |
1283 | setOperationAction(ISD::SDIVREM, MVT::i32, Custom); |
1284 | setOperationAction(ISD::UDIVREM, MVT::i32, Custom); |
1285 | setOperationAction(ISD::SDIVREM, MVT::i64, Custom); |
1286 | setOperationAction(ISD::UDIVREM, MVT::i64, Custom); |
1287 | } else { |
1288 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
1289 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
1290 | } |
1291 | |
1292 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { |
1293 | // MSVCRT doesn't have powi; fall back to pow |
1294 | setLibcallName(RTLIB::POWI_F32, nullptr); |
1295 | setLibcallName(RTLIB::POWI_F64, nullptr); |
1296 | } |
1297 | |
1298 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
1299 | setOperationAction(ISD::ConstantPool, MVT::i32, Custom); |
1300 | setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); |
1301 | setOperationAction(ISD::BlockAddress, MVT::i32, Custom); |
1302 | |
1303 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
1304 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
1305 | |
1306 | // Use the default implementation. |
1307 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
1308 | setOperationAction(ISD::VAARG, MVT::Other, Expand); |
1309 | setOperationAction(ISD::VACOPY, MVT::Other, Expand); |
1310 | setOperationAction(ISD::VAEND, MVT::Other, Expand); |
1311 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
1312 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
1313 | |
1314 | if (Subtarget->isTargetWindows()) |
1315 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); |
1316 | else |
1317 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); |
1318 | |
1319 | // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use |
1320 | // the default expansion. |
1321 | InsertFencesForAtomic = false; |
1322 | if (Subtarget->hasAnyDataBarrier() && |
1323 | (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { |
1324 | // ATOMIC_FENCE needs custom lowering; the others should have been expanded |
1325 | // to ldrex/strex loops already. |
1326 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
1327 | if (!Subtarget->isThumb() || !Subtarget->isMClass()) |
1328 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); |
1329 | |
1330 | // On v8, we have particularly efficient implementations of atomic fences |
1331 | // if they can be combined with nearby atomic loads and stores. |
1332 | if (!Subtarget->hasAcquireRelease() || |
1333 | getTargetMachine().getOptLevel() == 0) { |
1334 | // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. |
1335 | InsertFencesForAtomic = true; |
1336 | } |
1337 | } else { |
1338 | // If there's anything we can use as a barrier, go through custom lowering |
1339 | // for ATOMIC_FENCE. |
1340 | // If target has DMB in thumb, Fences can be inserted. |
1341 | if (Subtarget->hasDataBarrier()) |
1342 | InsertFencesForAtomic = true; |
1343 | |
1344 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, |
1345 | Subtarget->hasAnyDataBarrier() ? Custom : Expand); |
1346 | |
1347 | // Set them all for expansion, which will force libcalls. |
1348 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); |
1349 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); |
1350 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); |
1351 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); |
1352 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); |
1353 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); |
1354 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); |
1355 | setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); |
1356 | setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); |
1357 | setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); |
1358 | setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); |
1359 | setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); |
1360 | // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the |
1361 | // Unordered/Monotonic case. |
1362 | if (!InsertFencesForAtomic) { |
1363 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); |
1364 | setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); |
1365 | } |
1366 | } |
1367 | |
1368 | // Compute supported atomic widths. |
1369 | if (Subtarget->isTargetLinux() || |
1370 | (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { |
1371 | // For targets where __sync_* routines are reliably available, we use them |
1372 | // if necessary. |
1373 | // |
1374 | // ARM Linux always supports 64-bit atomics through kernel-assisted atomic |
1375 | // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? |
1376 | // |
1377 | // ARMv6 targets have native instructions in ARM mode. For Thumb mode, |
1378 | // such targets should provide __sync_* routines, which use the ARM mode |
1379 | // instructions. (ARMv6 doesn't have dmb, but it has an equivalent |
1380 | // encoding; see ARMISD::MEMBARRIER_MCR.) |
1381 | setMaxAtomicSizeInBitsSupported(64); |
1382 | } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || |
1383 | Subtarget->hasForced32BitAtomics()) { |
1384 | // Cortex-M (besides Cortex-M0) have 32-bit atomics. |
1385 | setMaxAtomicSizeInBitsSupported(32); |
1386 | } else { |
1387 | // We can't assume anything about other targets; just use libatomic |
1388 | // routines. |
1389 | setMaxAtomicSizeInBitsSupported(0); |
1390 | } |
1391 | |
1392 | setMaxDivRemBitWidthSupported(64); |
1393 | |
1394 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
1395 | |
1396 | // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. |
1397 | if (!Subtarget->hasV6Ops()) { |
1398 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
1399 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
1400 | } |
1401 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
1402 | |
1403 | if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && |
1404 | !Subtarget->isThumb1Only()) { |
1405 | // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR |
1406 | // iff target supports vfp2. |
1407 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); |
1408 | setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); |
1409 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
1410 | } |
1411 | |
1412 | // We want to custom lower some of our intrinsics. |
1413 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
1414 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
1415 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
1416 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
1417 | if (Subtarget->useSjLjEH()) |
1418 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
1419 | |
1420 | setOperationAction(ISD::SETCC, MVT::i32, Expand); |
1421 | setOperationAction(ISD::SETCC, MVT::f32, Expand); |
1422 | setOperationAction(ISD::SETCC, MVT::f64, Expand); |
1423 | setOperationAction(ISD::SELECT, MVT::i32, Custom); |
1424 | setOperationAction(ISD::SELECT, MVT::f32, Custom); |
1425 | setOperationAction(ISD::SELECT, MVT::f64, Custom); |
1426 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
1427 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
1428 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
1429 | if (Subtarget->hasFullFP16()) { |
1430 | setOperationAction(ISD::SETCC, MVT::f16, Expand); |
1431 | setOperationAction(ISD::SELECT, MVT::f16, Custom); |
1432 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
1433 | } |
1434 | |
1435 | setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); |
1436 | |
1437 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
1438 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
1439 | if (Subtarget->hasFullFP16()) |
1440 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
1441 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
1442 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
1443 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
1444 | |
1445 | // We don't support sin/cos/fmod/copysign/pow |
1446 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
1447 | setOperationAction(ISD::FSIN, MVT::f32, Expand); |
1448 | setOperationAction(ISD::FCOS, MVT::f32, Expand); |
1449 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
1450 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
1451 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
1452 | setOperationAction(ISD::FREM, MVT::f64, Expand); |
1453 | setOperationAction(ISD::FREM, MVT::f32, Expand); |
1454 | if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && |
1455 | !Subtarget->isThumb1Only()) { |
1456 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
1457 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
1458 | } |
1459 | setOperationAction(ISD::FPOW, MVT::f64, Expand); |
1460 | setOperationAction(ISD::FPOW, MVT::f32, Expand); |
1461 | |
1462 | if (!Subtarget->hasVFP4Base()) { |
1463 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
1464 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
1465 | } |
1466 | |
1467 | // Various VFP goodness |
1468 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { |
1469 | // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. |
1470 | if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { |
1471 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); |
1472 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); |
1473 | } |
1474 | |
1475 | // fp16 is a special v7 extension that adds f16 <-> f32 conversions. |
1476 | if (!Subtarget->hasFP16()) { |
1477 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); |
1478 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); |
1479 | } |
1480 | |
1481 | // Strict floating-point comparisons need custom lowering. |
1482 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
1483 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
1484 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); |
1485 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); |
1486 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); |
1487 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); |
1488 | } |
1489 | |
1490 | // Use __sincos_stret if available. |
1491 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
1492 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
1493 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
1494 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
1495 | } |
1496 | |
1497 | // FP-ARMv8 implements a lot of rounding-like FP operations. |
1498 | if (Subtarget->hasFPARMv8Base()) { |
1499 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); |
1500 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); |
1501 | setOperationAction(ISD::FROUND, MVT::f32, Legal); |
1502 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); |
1503 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); |
1504 | setOperationAction(ISD::FRINT, MVT::f32, Legal); |
1505 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
1506 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
1507 | if (Subtarget->hasNEON()) { |
1508 | setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); |
1509 | setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); |
1510 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
1511 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
1512 | } |
1513 | |
1514 | if (Subtarget->hasFP64()) { |
1515 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); |
1516 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); |
1517 | setOperationAction(ISD::FROUND, MVT::f64, Legal); |
1518 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); |
1519 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); |
1520 | setOperationAction(ISD::FRINT, MVT::f64, Legal); |
1521 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
1522 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
1523 | } |
1524 | } |
1525 | |
1526 | // FP16 often need to be promoted to call lib functions |
1527 | if (Subtarget->hasFullFP16()) { |
1528 | setOperationAction(ISD::FREM, MVT::f16, Promote); |
1529 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); |
1530 | setOperationAction(ISD::FSIN, MVT::f16, Promote); |
1531 | setOperationAction(ISD::FCOS, MVT::f16, Promote); |
1532 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
1533 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
1534 | setOperationAction(ISD::FPOW, MVT::f16, Promote); |
1535 | setOperationAction(ISD::FEXP, MVT::f16, Promote); |
1536 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
1537 | setOperationAction(ISD::FLOG, MVT::f16, Promote); |
1538 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
1539 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
1540 | |
1541 | setOperationAction(ISD::FROUND, MVT::f16, Legal); |
1542 | } |
1543 | |
1544 | if (Subtarget->hasNEON()) { |
1545 | // vmin and vmax aren't available in a scalar form, so we can use |
1546 | // a NEON instruction with an undef lane instead. This has a performance |
1547 | // penalty on some cores, so we don't do this unless we have been |
1548 | // asked to by the core tuning model. |
1549 | if (Subtarget->useNEONForSinglePrecisionFP()) { |
1550 | setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); |
1551 | setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); |
1552 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
1553 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
1554 | } |
1555 | setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); |
1556 | setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); |
1557 | setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); |
1558 | setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); |
1559 | |
1560 | if (Subtarget->hasFullFP16()) { |
1561 | setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); |
1562 | setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); |
1563 | setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); |
1564 | setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); |
1565 | |
1566 | setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); |
1567 | setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); |
1568 | setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); |
1569 | setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); |
1570 | } |
1571 | } |
1572 | |
1573 | // We have target-specific dag combine patterns for the following nodes: |
1574 | // ARMISD::VMOVRRD - No need to call setTargetDAGCombine |
1575 | setTargetDAGCombine( |
1576 | {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); |
1577 | |
1578 | if (Subtarget->hasMVEIntegerOps()) |
1579 | setTargetDAGCombine(ISD::VSELECT); |
1580 | |
1581 | if (Subtarget->hasV6Ops()) |
1582 | setTargetDAGCombine(ISD::SRL); |
1583 | if (Subtarget->isThumb1Only()) |
1584 | setTargetDAGCombine(ISD::SHL); |
1585 | // Attempt to lower smin/smax to ssat/usat |
1586 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || |
1587 | Subtarget->isThumb2()) { |
1588 | setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); |
1589 | } |
1590 | |
1591 | setStackPointerRegisterToSaveRestore(ARM::SP); |
1592 | |
1593 | if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || |
1594 | !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) |
1595 | setSchedulingPreference(Sched::RegPressure); |
1596 | else |
1597 | setSchedulingPreference(Sched::Hybrid); |
1598 | |
1599 | //// temporary - rewrite interface to use type |
1600 | MaxStoresPerMemset = 8; |
1601 | MaxStoresPerMemsetOptSize = 4; |
1602 | MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores |
1603 | MaxStoresPerMemcpyOptSize = 2; |
1604 | MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores |
1605 | MaxStoresPerMemmoveOptSize = 2; |
1606 | |
1607 | // On ARM arguments smaller than 4 bytes are extended, so all arguments |
1608 | // are at least 4 bytes aligned. |
1609 | setMinStackArgumentAlignment(Align(4)); |
1610 | |
1611 | // Prefer likely predicted branches to selects on out-of-order cores. |
1612 | PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); |
1613 | |
1614 | setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); |
1615 | |
1616 | setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); |
1617 | |
1618 | if (Subtarget->isThumb() || Subtarget->isThumb2()) |
1619 | setTargetDAGCombine(ISD::ABS); |
1620 | } |
1621 | |
1622 | bool ARMTargetLowering::useSoftFloat() const { |
1623 | return Subtarget->useSoftFloat(); |
1624 | } |
1625 | |
1626 | // FIXME: It might make sense to define the representative register class as the |
1627 | // nearest super-register that has a non-null superset. For example, DPR_VFP2 is |
1628 | // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, |
1629 | // SPR's representative would be DPR_VFP2. This should work well if register |
1630 | // pressure tracking were modified such that a register use would increment the |
1631 | // pressure of the register class's representative and all of it's super |
1632 | // classes' representatives transitively. We have not implemented this because |
1633 | // of the difficulty prior to coalescing of modeling operand register classes |
1634 | // due to the common occurrence of cross class copies and subregister insertions |
1635 | // and extractions. |
1636 | std::pair<const TargetRegisterClass *, uint8_t> |
1637 | ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
1638 | MVT VT) const { |
1639 | const TargetRegisterClass *RRC = nullptr; |
1640 | uint8_t Cost = 1; |
1641 | switch (VT.SimpleTy) { |
1642 | default: |
1643 | return TargetLowering::findRepresentativeClass(TRI, VT); |
1644 | // Use DPR as representative register class for all floating point |
1645 | // and vector types. Since there are 32 SPR registers and 32 DPR registers so |
1646 | // the cost is 1 for both f32 and f64. |
1647 | case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: |
1648 | case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: |
1649 | RRC = &ARM::DPRRegClass; |
1650 | // When NEON is used for SP, only half of the register file is available |
1651 | // because operations that define both SP and DP results will be constrained |
1652 | // to the VFP2 class (D0-D15). We currently model this constraint prior to |
1653 | // coalescing by double-counting the SP regs. See the FIXME above. |
1654 | if (Subtarget->useNEONForSinglePrecisionFP()) |
1655 | Cost = 2; |
1656 | break; |
1657 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
1658 | case MVT::v4f32: case MVT::v2f64: |
1659 | RRC = &ARM::DPRRegClass; |
1660 | Cost = 2; |
1661 | break; |
1662 | case MVT::v4i64: |
1663 | RRC = &ARM::DPRRegClass; |
1664 | Cost = 4; |
1665 | break; |
1666 | case MVT::v8i64: |
1667 | RRC = &ARM::DPRRegClass; |
1668 | Cost = 8; |
1669 | break; |
1670 | } |
1671 | return std::make_pair(RRC, Cost); |
1672 | } |
1673 | |
1674 | const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { |
1675 | #define MAKE_CASE(V) \ |
1676 | case V: \ |
1677 | return #V; |
1678 | switch ((ARMISD::NodeType)Opcode) { |
1679 | case ARMISD::FIRST_NUMBER: |
1680 | break; |
1681 | MAKE_CASE(ARMISD::Wrapper) |
1682 | MAKE_CASE(ARMISD::WrapperPIC) |
1683 | MAKE_CASE(ARMISD::WrapperJT) |
1684 | MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) |
1685 | MAKE_CASE(ARMISD::CALL) |
1686 | MAKE_CASE(ARMISD::CALL_PRED) |
1687 | MAKE_CASE(ARMISD::CALL_NOLINK) |
1688 | MAKE_CASE(ARMISD::tSECALL) |
1689 | MAKE_CASE(ARMISD::t2CALL_BTI) |
1690 | MAKE_CASE(ARMISD::BRCOND) |
1691 | MAKE_CASE(ARMISD::BR_JT) |
1692 | MAKE_CASE(ARMISD::BR2_JT) |
1693 | MAKE_CASE(ARMISD::RET_FLAG) |
1694 | MAKE_CASE(ARMISD::SERET_FLAG) |
1695 | MAKE_CASE(ARMISD::INTRET_FLAG) |
1696 | MAKE_CASE(ARMISD::PIC_ADD) |
1697 | MAKE_CASE(ARMISD::CMP) |
1698 | MAKE_CASE(ARMISD::CMN) |
1699 | MAKE_CASE(ARMISD::CMPZ) |
1700 | MAKE_CASE(ARMISD::CMPFP) |
1701 | MAKE_CASE(ARMISD::CMPFPE) |
1702 | MAKE_CASE(ARMISD::CMPFPw0) |
1703 | MAKE_CASE(ARMISD::CMPFPEw0) |
1704 | MAKE_CASE(ARMISD::BCC_i64) |
1705 | MAKE_CASE(ARMISD::FMSTAT) |
1706 | MAKE_CASE(ARMISD::CMOV) |
1707 | MAKE_CASE(ARMISD::SUBS) |
1708 | MAKE_CASE(ARMISD::SSAT) |
1709 | MAKE_CASE(ARMISD::USAT) |
1710 | MAKE_CASE(ARMISD::ASRL) |
1711 | MAKE_CASE(ARMISD::LSRL) |
1712 | MAKE_CASE(ARMISD::LSLL) |
1713 | MAKE_CASE(ARMISD::SRL_FLAG) |
1714 | MAKE_CASE(ARMISD::SRA_FLAG) |
1715 | MAKE_CASE(ARMISD::RRX) |
1716 | MAKE_CASE(ARMISD::ADDC) |
1717 | MAKE_CASE(ARMISD::ADDE) |
1718 | MAKE_CASE(ARMISD::SUBC) |
1719 | MAKE_CASE(ARMISD::SUBE) |
1720 | MAKE_CASE(ARMISD::LSLS) |
1721 | MAKE_CASE(ARMISD::VMOVRRD) |
1722 | MAKE_CASE(ARMISD::VMOVDRR) |
1723 | MAKE_CASE(ARMISD::VMOVhr) |
1724 | MAKE_CASE(ARMISD::VMOVrh) |
1725 | MAKE_CASE(ARMISD::VMOVSR) |
1726 | MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) |
1727 | MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) |
1728 | MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) |
1729 | MAKE_CASE(ARMISD::TC_RETURN) |
1730 | MAKE_CASE(ARMISD::THREAD_POINTER) |
1731 | MAKE_CASE(ARMISD::DYN_ALLOC) |
1732 | MAKE_CASE(ARMISD::MEMBARRIER_MCR) |
1733 | MAKE_CASE(ARMISD::PRELOAD) |
1734 | MAKE_CASE(ARMISD::LDRD) |
1735 | MAKE_CASE(ARMISD::STRD) |
1736 | MAKE_CASE(ARMISD::WIN__CHKSTK) |
1737 | MAKE_CASE(ARMISD::WIN__DBZCHK) |
1738 | MAKE_CASE(ARMISD::PREDICATE_CAST) |
1739 | MAKE_CASE(ARMISD::VECTOR_REG_CAST) |
1740 | MAKE_CASE(ARMISD::MVESEXT) |
1741 | MAKE_CASE(ARMISD::MVEZEXT) |
1742 | MAKE_CASE(ARMISD::MVETRUNC) |
1743 | MAKE_CASE(ARMISD::VCMP) |
1744 | MAKE_CASE(ARMISD::VCMPZ) |
1745 | MAKE_CASE(ARMISD::VTST) |
1746 | MAKE_CASE(ARMISD::VSHLs) |
1747 | MAKE_CASE(ARMISD::VSHLu) |
1748 | MAKE_CASE(ARMISD::VSHLIMM) |
1749 | MAKE_CASE(ARMISD::VSHRsIMM) |
1750 | MAKE_CASE(ARMISD::VSHRuIMM) |
1751 | MAKE_CASE(ARMISD::VRSHRsIMM) |
1752 | MAKE_CASE(ARMISD::VRSHRuIMM) |
1753 | MAKE_CASE(ARMISD::VRSHRNIMM) |
1754 | MAKE_CASE(ARMISD::VQSHLsIMM) |
1755 | MAKE_CASE(ARMISD::VQSHLuIMM) |
1756 | MAKE_CASE(ARMISD::VQSHLsuIMM) |
1757 | MAKE_CASE(ARMISD::VQSHRNsIMM) |
1758 | MAKE_CASE(ARMISD::VQSHRNuIMM) |
1759 | MAKE_CASE(ARMISD::VQSHRNsuIMM) |
1760 | MAKE_CASE(ARMISD::VQRSHRNsIMM) |
1761 | MAKE_CASE(ARMISD::VQRSHRNuIMM) |
1762 | MAKE_CASE(ARMISD::VQRSHRNsuIMM) |
1763 | MAKE_CASE(ARMISD::VSLIIMM) |
1764 | MAKE_CASE(ARMISD::VSRIIMM) |
1765 | MAKE_CASE(ARMISD::VGETLANEu) |
1766 | MAKE_CASE(ARMISD::VGETLANEs) |
1767 | MAKE_CASE(ARMISD::VMOVIMM) |
1768 | MAKE_CASE(ARMISD::VMVNIMM) |
1769 | MAKE_CASE(ARMISD::VMOVFPIMM) |
1770 | MAKE_CASE(ARMISD::VDUP) |
1771 | MAKE_CASE(ARMISD::VDUPLANE) |
1772 | MAKE_CASE(ARMISD::VEXT) |
1773 | MAKE_CASE(ARMISD::VREV64) |
1774 | MAKE_CASE(ARMISD::VREV32) |
1775 | MAKE_CASE(ARMISD::VREV16) |
1776 | MAKE_CASE(ARMISD::VZIP) |
1777 | MAKE_CASE(ARMISD::VUZP) |
1778 | MAKE_CASE(ARMISD::VTRN) |
1779 | MAKE_CASE(ARMISD::VTBL1) |
1780 | MAKE_CASE(ARMISD::VTBL2) |
1781 | MAKE_CASE(ARMISD::VMOVN) |
1782 | MAKE_CASE(ARMISD::VQMOVNs) |
1783 | MAKE_CASE(ARMISD::VQMOVNu) |
1784 | MAKE_CASE(ARMISD::VCVTN) |
1785 | MAKE_CASE(ARMISD::VCVTL) |
1786 | MAKE_CASE(ARMISD::VIDUP) |
1787 | MAKE_CASE(ARMISD::VMULLs) |
1788 | MAKE_CASE(ARMISD::VMULLu) |
1789 | MAKE_CASE(ARMISD::VQDMULH) |
1790 | MAKE_CASE(ARMISD::VADDVs) |
1791 | MAKE_CASE(ARMISD::VADDVu) |
1792 | MAKE_CASE(ARMISD::VADDVps) |
1793 | MAKE_CASE(ARMISD::VADDVpu) |
1794 | MAKE_CASE(ARMISD::VADDLVs) |
1795 | MAKE_CASE(ARMISD::VADDLVu) |
1796 | MAKE_CASE(ARMISD::VADDLVAs) |
1797 | MAKE_CASE(ARMISD::VADDLVAu) |
1798 | MAKE_CASE(ARMISD::VADDLVps) |
1799 | MAKE_CASE(ARMISD::VADDLVpu) |
1800 | MAKE_CASE(ARMISD::VADDLVAps) |
1801 | MAKE_CASE(ARMISD::VADDLVApu) |
1802 | MAKE_CASE(ARMISD::VMLAVs) |
1803 | MAKE_CASE(ARMISD::VMLAVu) |
1804 | MAKE_CASE(ARMISD::VMLAVps) |
1805 | MAKE_CASE(ARMISD::VMLAVpu) |
1806 | MAKE_CASE(ARMISD::VMLALVs) |
1807 | MAKE_CASE(ARMISD::VMLALVu) |
1808 | MAKE_CASE(ARMISD::VMLALVps) |
1809 | MAKE_CASE(ARMISD::VMLALVpu) |
1810 | MAKE_CASE(ARMISD::VMLALVAs) |
1811 | MAKE_CASE(ARMISD::VMLALVAu) |
1812 | MAKE_CASE(ARMISD::VMLALVAps) |
1813 | MAKE_CASE(ARMISD::VMLALVApu) |
1814 | MAKE_CASE(ARMISD::VMINVu) |
1815 | MAKE_CASE(ARMISD::VMINVs) |
1816 | MAKE_CASE(ARMISD::VMAXVu) |
1817 | MAKE_CASE(ARMISD::VMAXVs) |
1818 | MAKE_CASE(ARMISD::UMAAL) |
1819 | MAKE_CASE(ARMISD::UMLAL) |
1820 | MAKE_CASE(ARMISD::SMLAL) |
1821 | MAKE_CASE(ARMISD::SMLALBB) |
1822 | MAKE_CASE(ARMISD::SMLALBT) |
1823 | MAKE_CASE(ARMISD::SMLALTB) |
1824 | MAKE_CASE(ARMISD::SMLALTT) |
1825 | MAKE_CASE(ARMISD::SMULWB) |
1826 | MAKE_CASE(ARMISD::SMULWT) |
1827 | MAKE_CASE(ARMISD::SMLALD) |
1828 | MAKE_CASE(ARMISD::SMLALDX) |
1829 | MAKE_CASE(ARMISD::SMLSLD) |
1830 | MAKE_CASE(ARMISD::SMLSLDX) |
1831 | MAKE_CASE(ARMISD::SMMLAR) |
1832 | MAKE_CASE(ARMISD::SMMLSR) |
1833 | MAKE_CASE(ARMISD::QADD16b) |
1834 | MAKE_CASE(ARMISD::QSUB16b) |
1835 | MAKE_CASE(ARMISD::QADD8b) |
1836 | MAKE_CASE(ARMISD::QSUB8b) |
1837 | MAKE_CASE(ARMISD::UQADD16b) |
1838 | MAKE_CASE(ARMISD::UQSUB16b) |
1839 | MAKE_CASE(ARMISD::UQADD8b) |
1840 | MAKE_CASE(ARMISD::UQSUB8b) |
1841 | MAKE_CASE(ARMISD::BUILD_VECTOR) |
1842 | MAKE_CASE(ARMISD::BFI) |
1843 | MAKE_CASE(ARMISD::VORRIMM) |
1844 | MAKE_CASE(ARMISD::VBICIMM) |
1845 | MAKE_CASE(ARMISD::VBSP) |
1846 | MAKE_CASE(ARMISD::MEMCPY) |
1847 | MAKE_CASE(ARMISD::VLD1DUP) |
1848 | MAKE_CASE(ARMISD::VLD2DUP) |
1849 | MAKE_CASE(ARMISD::VLD3DUP) |
1850 | MAKE_CASE(ARMISD::VLD4DUP) |
1851 | MAKE_CASE(ARMISD::VLD1_UPD) |
1852 | MAKE_CASE(ARMISD::VLD2_UPD) |
1853 | MAKE_CASE(ARMISD::VLD3_UPD) |
1854 | MAKE_CASE(ARMISD::VLD4_UPD) |
1855 | MAKE_CASE(ARMISD::VLD1x2_UPD) |
1856 | MAKE_CASE(ARMISD::VLD1x3_UPD) |
1857 | MAKE_CASE(ARMISD::VLD1x4_UPD) |
1858 | MAKE_CASE(ARMISD::VLD2LN_UPD) |
1859 | MAKE_CASE(ARMISD::VLD3LN_UPD) |
1860 | MAKE_CASE(ARMISD::VLD4LN_UPD) |
1861 | MAKE_CASE(ARMISD::VLD1DUP_UPD) |
1862 | MAKE_CASE(ARMISD::VLD2DUP_UPD) |
1863 | MAKE_CASE(ARMISD::VLD3DUP_UPD) |
1864 | MAKE_CASE(ARMISD::VLD4DUP_UPD) |
1865 | MAKE_CASE(ARMISD::VST1_UPD) |
1866 | MAKE_CASE(ARMISD::VST2_UPD) |
1867 | MAKE_CASE(ARMISD::VST3_UPD) |
1868 | MAKE_CASE(ARMISD::VST4_UPD) |
1869 | MAKE_CASE(ARMISD::VST1x2_UPD) |
1870 | MAKE_CASE(ARMISD::VST1x3_UPD) |
1871 | MAKE_CASE(ARMISD::VST1x4_UPD) |
1872 | MAKE_CASE(ARMISD::VST2LN_UPD) |
1873 | MAKE_CASE(ARMISD::VST3LN_UPD) |
1874 | MAKE_CASE(ARMISD::VST4LN_UPD) |
1875 | MAKE_CASE(ARMISD::WLS) |
1876 | MAKE_CASE(ARMISD::WLSSETUP) |
1877 | MAKE_CASE(ARMISD::LE) |
1878 | MAKE_CASE(ARMISD::LOOP_DEC) |
1879 | MAKE_CASE(ARMISD::CSINV) |
1880 | MAKE_CASE(ARMISD::CSNEG) |
1881 | MAKE_CASE(ARMISD::CSINC) |
1882 | MAKE_CASE(ARMISD::MEMCPYLOOP) |
1883 | MAKE_CASE(ARMISD::MEMSETLOOP) |
1884 | #undef MAKE_CASE |
1885 | } |
1886 | return nullptr; |
1887 | } |
1888 | |
1889 | EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1890 | EVT VT) const { |
1891 | if (!VT.isVector()) |
1892 | return getPointerTy(DL); |
1893 | |
1894 | // MVE has a predicate register. |
1895 | if ((Subtarget->hasMVEIntegerOps() && |
1896 | (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || |
1897 | VT == MVT::v16i8)) || |
1898 | (Subtarget->hasMVEFloatOps() && |
1899 | (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16))) |
1900 | return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); |
1901 | return VT.changeVectorElementTypeToInteger(); |
1902 | } |
1903 | |
1904 | /// getRegClassFor - Return the register class that should be used for the |
1905 | /// specified value type. |
1906 | const TargetRegisterClass * |
1907 | ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { |
1908 | (void)isDivergent; |
1909 | // Map v4i64 to QQ registers but do not make the type legal. Similarly map |
1910 | // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to |
1911 | // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive |
1912 | // MVE Q registers. |
1913 | if (Subtarget->hasNEON()) { |
1914 | if (VT == MVT::v4i64) |
1915 | return &ARM::QQPRRegClass; |
1916 | if (VT == MVT::v8i64) |
1917 | return &ARM::QQQQPRRegClass; |
1918 | } |
1919 | if (Subtarget->hasMVEIntegerOps()) { |
1920 | if (VT == MVT::v4i64) |
1921 | return &ARM::MQQPRRegClass; |
1922 | if (VT == MVT::v8i64) |
1923 | return &ARM::MQQQQPRRegClass; |
1924 | } |
1925 | return TargetLowering::getRegClassFor(VT); |
1926 | } |
1927 | |
1928 | // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the |
1929 | // source/dest is aligned and the copy size is large enough. We therefore want |
1930 | // to align such objects passed to memory intrinsics. |
1931 | bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, |
1932 | Align &PrefAlign) const { |
1933 | if (!isa<MemIntrinsic>(CI)) |
1934 | return false; |
1935 | MinSize = 8; |
1936 | // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 |
1937 | // cycle faster than 4-byte aligned LDM. |
1938 | PrefAlign = |
1939 | (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4)); |
1940 | return true; |
1941 | } |
1942 | |
1943 | // Create a fast isel object. |
1944 | FastISel * |
1945 | ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
1946 | const TargetLibraryInfo *libInfo) const { |
1947 | return ARM::createFastISel(funcInfo, libInfo); |
1948 | } |
1949 | |
1950 | Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { |
1951 | unsigned NumVals = N->getNumValues(); |
1952 | if (!NumVals) |
1953 | return Sched::RegPressure; |
1954 | |
1955 | for (unsigned i = 0; i != NumVals; ++i) { |
1956 | EVT VT = N->getValueType(i); |
1957 | if (VT == MVT::Glue || VT == MVT::Other) |
1958 | continue; |
1959 | if (VT.isFloatingPoint() || VT.isVector()) |
1960 | return Sched::ILP; |
1961 | } |
1962 | |
1963 | if (!N->isMachineOpcode()) |
1964 | return Sched::RegPressure; |
1965 | |
1966 | // Load are scheduled for latency even if there instruction itinerary |
1967 | // is not available. |
1968 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
1969 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); |
1970 | |
1971 | if (MCID.getNumDefs() == 0) |
1972 | return Sched::RegPressure; |
1973 | if (!Itins->isEmpty() && |
1974 | Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) |
1975 | return Sched::ILP; |
1976 | |
1977 | return Sched::RegPressure; |
1978 | } |
1979 | |
1980 | //===----------------------------------------------------------------------===// |
1981 | // Lowering Code |
1982 | //===----------------------------------------------------------------------===// |
1983 | |
1984 | static bool isSRL16(const SDValue &Op) { |
1985 | if (Op.getOpcode() != ISD::SRL) |
1986 | return false; |
1987 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
1988 | return Const->getZExtValue() == 16; |
1989 | return false; |
1990 | } |
1991 | |
1992 | static bool isSRA16(const SDValue &Op) { |
1993 | if (Op.getOpcode() != ISD::SRA) |
1994 | return false; |
1995 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
1996 | return Const->getZExtValue() == 16; |
1997 | return false; |
1998 | } |
1999 | |
2000 | static bool isSHL16(const SDValue &Op) { |
2001 | if (Op.getOpcode() != ISD::SHL) |
2002 | return false; |
2003 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
2004 | return Const->getZExtValue() == 16; |
2005 | return false; |
2006 | } |
2007 | |
2008 | // Check for a signed 16-bit value. We special case SRA because it makes it |
2009 | // more simple when also looking for SRAs that aren't sign extending a |
2010 | // smaller value. Without the check, we'd need to take extra care with |
2011 | // checking order for some operations. |
2012 | static bool isS16(const SDValue &Op, SelectionDAG &DAG) { |
2013 | if (isSRA16(Op)) |
2014 | return isSHL16(Op.getOperand(0)); |
2015 | return DAG.ComputeNumSignBits(Op) == 17; |
2016 | } |
2017 | |
2018 | /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC |
2019 | static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { |
2020 | switch (CC) { |
2021 | default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2021); |
2022 | case ISD::SETNE: return ARMCC::NE; |
2023 | case ISD::SETEQ: return ARMCC::EQ; |
2024 | case ISD::SETGT: return ARMCC::GT; |
2025 | case ISD::SETGE: return ARMCC::GE; |
2026 | case ISD::SETLT: return ARMCC::LT; |
2027 | case ISD::SETLE: return ARMCC::LE; |
2028 | case ISD::SETUGT: return ARMCC::HI; |
2029 | case ISD::SETUGE: return ARMCC::HS; |
2030 | case ISD::SETULT: return ARMCC::LO; |
2031 | case ISD::SETULE: return ARMCC::LS; |
2032 | } |
2033 | } |
2034 | |
2035 | /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. |
2036 | static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, |
2037 | ARMCC::CondCodes &CondCode2) { |
2038 | CondCode2 = ARMCC::AL; |
2039 | switch (CC) { |
2040 | default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2040); |
2041 | case ISD::SETEQ: |
2042 | case ISD::SETOEQ: CondCode = ARMCC::EQ; break; |
2043 | case ISD::SETGT: |
2044 | case ISD::SETOGT: CondCode = ARMCC::GT; break; |
2045 | case ISD::SETGE: |
2046 | case ISD::SETOGE: CondCode = ARMCC::GE; break; |
2047 | case ISD::SETOLT: CondCode = ARMCC::MI; break; |
2048 | case ISD::SETOLE: CondCode = ARMCC::LS; break; |
2049 | case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; |
2050 | case ISD::SETO: CondCode = ARMCC::VC; break; |
2051 | case ISD::SETUO: CondCode = ARMCC::VS; break; |
2052 | case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; |
2053 | case ISD::SETUGT: CondCode = ARMCC::HI; break; |
2054 | case ISD::SETUGE: CondCode = ARMCC::PL; break; |
2055 | case ISD::SETLT: |
2056 | case ISD::SETULT: CondCode = ARMCC::LT; break; |
2057 | case ISD::SETLE: |
2058 | case ISD::SETULE: CondCode = ARMCC::LE; break; |
2059 | case ISD::SETNE: |
2060 | case ISD::SETUNE: CondCode = ARMCC::NE; break; |
2061 | } |
2062 | } |
2063 | |
2064 | //===----------------------------------------------------------------------===// |
2065 | // Calling Convention Implementation |
2066 | //===----------------------------------------------------------------------===// |
2067 | |
2068 | /// getEffectiveCallingConv - Get the effective calling convention, taking into |
2069 | /// account presence of floating point hardware and calling convention |
2070 | /// limitations, such as support for variadic functions. |
2071 | CallingConv::ID |
2072 | ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, |
2073 | bool isVarArg) const { |
2074 | switch (CC) { |
2075 | default: |
2076 | report_fatal_error("Unsupported calling convention"); |
2077 | case CallingConv::ARM_AAPCS: |
2078 | case CallingConv::ARM_APCS: |
2079 | case CallingConv::GHC: |
2080 | case CallingConv::CFGuard_Check: |
2081 | return CC; |
2082 | case CallingConv::PreserveMost: |
2083 | return CallingConv::PreserveMost; |
2084 | case CallingConv::ARM_AAPCS_VFP: |
2085 | case CallingConv::Swift: |
2086 | case CallingConv::SwiftTail: |
2087 | return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; |
2088 | case CallingConv::C: |
2089 | case CallingConv::Tail: |
2090 | if (!Subtarget->isAAPCS_ABI()) |
2091 | return CallingConv::ARM_APCS; |
2092 | else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && |
2093 | getTargetMachine().Options.FloatABIType == FloatABI::Hard && |
2094 | !isVarArg) |
2095 | return CallingConv::ARM_AAPCS_VFP; |
2096 | else |
2097 | return CallingConv::ARM_AAPCS; |
2098 | case CallingConv::Fast: |
2099 | case CallingConv::CXX_FAST_TLS: |
2100 | if (!Subtarget->isAAPCS_ABI()) { |
2101 | if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) |
2102 | return CallingConv::Fast; |
2103 | return CallingConv::ARM_APCS; |
2104 | } else if (Subtarget->hasVFP2Base() && |
2105 | !Subtarget->isThumb1Only() && !isVarArg) |
2106 | return CallingConv::ARM_AAPCS_VFP; |
2107 | else |
2108 | return CallingConv::ARM_AAPCS; |
2109 | } |
2110 | } |
2111 | |
2112 | CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
2113 | bool isVarArg) const { |
2114 | return CCAssignFnForNode(CC, false, isVarArg); |
2115 | } |
2116 | |
2117 | CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, |
2118 | bool isVarArg) const { |
2119 | return CCAssignFnForNode(CC, true, isVarArg); |
2120 | } |
2121 | |
2122 | /// CCAssignFnForNode - Selects the correct CCAssignFn for the given |
2123 | /// CallingConvention. |
2124 | CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, |
2125 | bool Return, |
2126 | bool isVarArg) const { |
2127 | switch (getEffectiveCallingConv(CC, isVarArg)) { |
2128 | default: |
2129 | report_fatal_error("Unsupported calling convention"); |
2130 | case CallingConv::ARM_APCS: |
2131 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); |
2132 | case CallingConv::ARM_AAPCS: |
2133 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
2134 | case CallingConv::ARM_AAPCS_VFP: |
2135 | return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); |
2136 | case CallingConv::Fast: |
2137 | return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); |
2138 | case CallingConv::GHC: |
2139 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); |
2140 | case CallingConv::PreserveMost: |
2141 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); |
2142 | case CallingConv::CFGuard_Check: |
2143 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check); |
2144 | } |
2145 | } |
2146 | |
2147 | SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, |
2148 | MVT LocVT, MVT ValVT, SDValue Val) const { |
2149 | Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()), |
2150 | Val); |
2151 | if (Subtarget->hasFullFP16()) { |
2152 | Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val); |
2153 | } else { |
2154 | Val = DAG.getNode(ISD::TRUNCATE, dl, |
2155 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); |
2156 | Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val); |
2157 | } |
2158 | return Val; |
2159 | } |
2160 | |
2161 | SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, |
2162 | MVT LocVT, MVT ValVT, |
2163 | SDValue Val) const { |
2164 | if (Subtarget->hasFullFP16()) { |
2165 | Val = DAG.getNode(ARMISD::VMOVrh, dl, |
2166 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); |
2167 | } else { |
2168 | Val = DAG.getNode(ISD::BITCAST, dl, |
2169 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); |
2170 | Val = DAG.getNode(ISD::ZERO_EXTEND, dl, |
2171 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); |
2172 | } |
2173 | return DAG.getNode(ISD::BITCAST, dl, LocVT, Val); |
2174 | } |
2175 | |
2176 | /// LowerCallResult - Lower the result values of a call into the |
2177 | /// appropriate copies out of appropriate physical registers. |
2178 | SDValue ARMTargetLowering::LowerCallResult( |
2179 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
2180 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
2181 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
2182 | SDValue ThisVal) const { |
2183 | // Assign locations to each value returned by this call. |
2184 | SmallVector<CCValAssign, 16> RVLocs; |
2185 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
2186 | *DAG.getContext()); |
2187 | CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); |
2188 | |
2189 | // Copy all of the result registers out of their specified physreg. |
2190 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
2191 | CCValAssign VA = RVLocs[i]; |
2192 | |
2193 | // Pass 'this' value directly from the argument to return value, to avoid |
2194 | // reg unit interference |
2195 | if (i == 0 && isThisReturn) { |
2196 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2197, __extension__ __PRETTY_FUNCTION__)) |
2197 | "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2197, __extension__ __PRETTY_FUNCTION__)); |
2198 | InVals.push_back(ThisVal); |
2199 | continue; |
2200 | } |
2201 | |
2202 | SDValue Val; |
2203 | if (VA.needsCustom() && |
2204 | (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) { |
2205 | // Handle f64 or half of a v2f64. |
2206 | SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
2207 | InFlag); |
2208 | Chain = Lo.getValue(1); |
2209 | InFlag = Lo.getValue(2); |
2210 | VA = RVLocs[++i]; // skip ahead to next loc |
2211 | SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, |
2212 | InFlag); |
2213 | Chain = Hi.getValue(1); |
2214 | InFlag = Hi.getValue(2); |
2215 | if (!Subtarget->isLittle()) |
2216 | std::swap (Lo, Hi); |
2217 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
2218 | |
2219 | if (VA.getLocVT() == MVT::v2f64) { |
2220 | SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); |
2221 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
2222 | DAG.getConstant(0, dl, MVT::i32)); |
2223 | |
2224 | VA = RVLocs[++i]; // skip ahead to next loc |
2225 | Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
2226 | Chain = Lo.getValue(1); |
2227 | InFlag = Lo.getValue(2); |
2228 | VA = RVLocs[++i]; // skip ahead to next loc |
2229 | Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); |
2230 | Chain = Hi.getValue(1); |
2231 | InFlag = Hi.getValue(2); |
2232 | if (!Subtarget->isLittle()) |
2233 | std::swap (Lo, Hi); |
2234 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); |
2235 | Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, |
2236 | DAG.getConstant(1, dl, MVT::i32)); |
2237 | } |
2238 | } else { |
2239 | Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), |
2240 | InFlag); |
2241 | Chain = Val.getValue(1); |
2242 | InFlag = Val.getValue(2); |
2243 | } |
2244 | |
2245 | switch (VA.getLocInfo()) { |
2246 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2246); |
2247 | case CCValAssign::Full: break; |
2248 | case CCValAssign::BCvt: |
2249 | Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); |
2250 | break; |
2251 | } |
2252 | |
2253 | // f16 arguments have their size extended to 4 bytes and passed as if they |
2254 | // had been copied to the LSBs of a 32-bit register. |
2255 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) |
2256 | if (VA.needsCustom() && |
2257 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) |
2258 | Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val); |
2259 | |
2260 | InVals.push_back(Val); |
2261 | } |
2262 | |
2263 | return Chain; |
2264 | } |
2265 | |
2266 | std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( |
2267 | const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr, |
2268 | bool IsTailCall, int SPDiff) const { |
2269 | SDValue DstAddr; |
2270 | MachinePointerInfo DstInfo; |
2271 | int32_t Offset = VA.getLocMemOffset(); |
2272 | MachineFunction &MF = DAG.getMachineFunction(); |
2273 | |
2274 | if (IsTailCall) { |
2275 | Offset += SPDiff; |
2276 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
2277 | int Size = VA.getLocVT().getFixedSizeInBits() / 8; |
2278 | int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); |
2279 | DstAddr = DAG.getFrameIndex(FI, PtrVT); |
2280 | DstInfo = |
2281 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); |
2282 | } else { |
2283 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); |
2284 | DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
2285 | StackPtr, PtrOff); |
2286 | DstInfo = |
2287 | MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset); |
2288 | } |
2289 | |
2290 | return std::make_pair(DstAddr, DstInfo); |
2291 | } |
2292 | |
2293 | void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, |
2294 | SDValue Chain, SDValue &Arg, |
2295 | RegsToPassVector &RegsToPass, |
2296 | CCValAssign &VA, CCValAssign &NextVA, |
2297 | SDValue &StackPtr, |
2298 | SmallVectorImpl<SDValue> &MemOpChains, |
2299 | bool IsTailCall, |
2300 | int SPDiff) const { |
2301 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, |
2302 | DAG.getVTList(MVT::i32, MVT::i32), Arg); |
2303 | unsigned id = Subtarget->isLittle() ? 0 : 1; |
2304 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); |
2305 | |
2306 | if (NextVA.isRegLoc()) |
2307 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); |
2308 | else { |
2309 | assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail ("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2309, __extension__ __PRETTY_FUNCTION__)); |
2310 | if (!StackPtr.getNode()) |
2311 | StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, |
2312 | getPointerTy(DAG.getDataLayout())); |
2313 | |
2314 | SDValue DstAddr; |
2315 | MachinePointerInfo DstInfo; |
2316 | std::tie(DstAddr, DstInfo) = |
2317 | computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff); |
2318 | MemOpChains.push_back( |
2319 | DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo)); |
2320 | } |
2321 | } |
2322 | |
2323 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
2324 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
2325 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
2326 | } |
2327 | |
2328 | /// LowerCall - Lowering a call into a callseq_start <- |
2329 | /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter |
2330 | /// nodes. |
2331 | SDValue |
2332 | ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
2333 | SmallVectorImpl<SDValue> &InVals) const { |
2334 | SelectionDAG &DAG = CLI.DAG; |
2335 | SDLoc &dl = CLI.DL; |
2336 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
2337 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
2338 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
2339 | SDValue Chain = CLI.Chain; |
2340 | SDValue Callee = CLI.Callee; |
2341 | bool &isTailCall = CLI.IsTailCall; |
2342 | CallingConv::ID CallConv = CLI.CallConv; |
2343 | bool doesNotRet = CLI.DoesNotReturn; |
2344 | bool isVarArg = CLI.IsVarArg; |
2345 | |
2346 | MachineFunction &MF = DAG.getMachineFunction(); |
2347 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
2348 | MachineFunction::CallSiteInfo CSInfo; |
2349 | bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); |
2350 | bool isThisReturn = false; |
2351 | bool isCmseNSCall = false; |
2352 | bool isSibCall = false; |
2353 | bool PreferIndirect = false; |
2354 | bool GuardWithBTI = false; |
2355 | |
2356 | // Lower 'returns_twice' calls to a pseudo-instruction. |
2357 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && |
2358 | !Subtarget->noBTIAtReturnTwice()) |
2359 | GuardWithBTI = AFI->branchTargetEnforcement(); |
2360 | |
2361 | // Determine whether this is a non-secure function call. |
2362 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call")) |
2363 | isCmseNSCall = true; |
2364 | |
2365 | // Disable tail calls if they're not supported. |
2366 | if (!Subtarget->supportsTailCall()) |
2367 | isTailCall = false; |
2368 | |
2369 | // For both the non-secure calls and the returns from a CMSE entry function, |
2370 | // the function needs to do some extra work afte r the call, or before the |
2371 | // return, respectively, thus it cannot end with atail call |
2372 | if (isCmseNSCall || AFI->isCmseNSEntryFunction()) |
2373 | isTailCall = false; |
2374 | |
2375 | if (isa<GlobalAddressSDNode>(Callee)) { |
2376 | // If we're optimizing for minimum size and the function is called three or |
2377 | // more times in this block, we can improve codesize by calling indirectly |
2378 | // as BLXr has a 16-bit encoding. |
2379 | auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); |
2380 | if (CLI.CB) { |
2381 | auto *BB = CLI.CB->getParent(); |
2382 | PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() && |
2383 | count_if(GV->users(), [&BB](const User *U) { |
2384 | return isa<Instruction>(U) && |
2385 | cast<Instruction>(U)->getParent() == BB; |
2386 | }) > 2; |
2387 | } |
2388 | } |
2389 | if (isTailCall) { |
2390 | // Check if it's really possible to do a tail call. |
2391 | isTailCall = IsEligibleForTailCallOptimization( |
2392 | Callee, CallConv, isVarArg, isStructRet, |
2393 | MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, |
2394 | PreferIndirect); |
2395 | |
2396 | if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt && |
2397 | CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail) |
2398 | isSibCall = true; |
2399 | |
2400 | // We don't support GuaranteedTailCallOpt for ARM, only automatically |
2401 | // detected sibcalls. |
2402 | if (isTailCall) |
2403 | ++NumTailCalls; |
2404 | } |
2405 | |
2406 | if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall()) |
2407 | report_fatal_error("failed to perform tail call elimination on a call " |
2408 | "site marked musttail"); |
2409 | // Analyze operands of the call, assigning locations to each operand. |
2410 | SmallVector<CCValAssign, 16> ArgLocs; |
2411 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
2412 | *DAG.getContext()); |
2413 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); |
2414 | |
2415 | // Get a count of how many bytes are to be pushed on the stack. |
2416 | unsigned NumBytes = CCInfo.getNextStackOffset(); |
2417 | |
2418 | // SPDiff is the byte offset of the call's argument area from the callee's. |
2419 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
2420 | // by this amount for a tail call. In a sibling call it must be 0 because the |
2421 | // caller will deallocate the entire stack and the callee still expects its |
2422 | // arguments to begin at SP+0. Completely unused for non-tail calls. |
2423 | int SPDiff = 0; |
2424 | |
2425 | if (isTailCall && !isSibCall) { |
2426 | auto FuncInfo = MF.getInfo<ARMFunctionInfo>(); |
2427 | unsigned NumReusableBytes = FuncInfo->getArgumentStackSize(); |
2428 | |
2429 | // Since callee will pop argument stack as a tail call, we must keep the |
2430 | // popped size 16-byte aligned. |
2431 | Align StackAlign = DAG.getDataLayout().getStackAlignment(); |
2432 | NumBytes = alignTo(NumBytes, StackAlign); |
2433 | |
2434 | // SPDiff will be negative if this tail call requires more space than we |
2435 | // would automatically have in our incoming argument space. Positive if we |
2436 | // can actually shrink the stack. |
2437 | SPDiff = NumReusableBytes - NumBytes; |
2438 | |
2439 | // If this call requires more stack than we have available from |
2440 | // LowerFormalArguments, tell FrameLowering to reserve space for it. |
2441 | if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff) |
2442 | AFI->setArgRegsSaveSize(-SPDiff); |
2443 | } |
2444 | |
2445 | if (isSibCall) { |
2446 | // For sibling tail calls, memory operands are available in our caller's stack. |
2447 | NumBytes = 0; |
2448 | } else { |
2449 | // Adjust the stack pointer for the new arguments... |
2450 | // These operations are automatically eliminated by the prolog/epilog pass |
2451 | Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl); |
2452 | } |
2453 | |
2454 | SDValue StackPtr = |
2455 | DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); |
2456 | |
2457 | RegsToPassVector RegsToPass; |
2458 | SmallVector<SDValue, 8> MemOpChains; |
2459 | |
2460 | // During a tail call, stores to the argument area must happen after all of |
2461 | // the function's incoming arguments have been loaded because they may alias. |
2462 | // This is done by folding in a TokenFactor from LowerFormalArguments, but |
2463 | // there's no point in doing so repeatedly so this tracks whether that's |
2464 | // happened yet. |
2465 | bool AfterFormalArgLoads = false; |
2466 | |
2467 | // Walk the register/memloc assignments, inserting copies/loads. In the case |
2468 | // of tail call optimization, arguments are handled later. |
2469 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); |
2470 | i != e; |
2471 | ++i, ++realArgIdx) { |
2472 | CCValAssign &VA = ArgLocs[i]; |
2473 | SDValue Arg = OutVals[realArgIdx]; |
2474 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; |
2475 | bool isByVal = Flags.isByVal(); |
2476 | |
2477 | // Promote the value if needed. |
2478 | switch (VA.getLocInfo()) { |
2479 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2479); |
2480 | case CCValAssign::Full: break; |
2481 | case CCValAssign::SExt: |
2482 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); |
2483 | break; |
2484 | case CCValAssign::ZExt: |
2485 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); |
2486 | break; |
2487 | case CCValAssign::AExt: |
2488 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); |
2489 | break; |
2490 | case CCValAssign::BCvt: |
2491 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); |
2492 | break; |
2493 | } |
2494 | |
2495 | if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { |
2496 | Chain = DAG.getStackArgumentTokenFactor(Chain); |
2497 | AfterFormalArgLoads = true; |
2498 | } |
2499 | |
2500 | // f16 arguments have their size extended to 4 bytes and passed as if they |
2501 | // had been copied to the LSBs of a 32-bit register. |
2502 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) |
2503 | if (VA.needsCustom() && |
2504 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { |
2505 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); |
2506 | } else { |
2507 | // f16 arguments could have been extended prior to argument lowering. |
2508 | // Mask them arguments if this is a CMSE nonsecure call. |
2509 | auto ArgVT = Outs[realArgIdx].ArgVT; |
2510 | if (isCmseNSCall && (ArgVT == MVT::f16)) { |
2511 | auto LocBits = VA.getLocVT().getSizeInBits(); |
2512 | auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits()); |
2513 | SDValue Mask = |
2514 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); |
2515 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); |
2516 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); |
2517 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); |
2518 | } |
2519 | } |
2520 | |
2521 | // f64 and v2f64 might be passed in i32 pairs and must be split into pieces |
2522 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { |
2523 | SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
2524 | DAG.getConstant(0, dl, MVT::i32)); |
2525 | SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
2526 | DAG.getConstant(1, dl, MVT::i32)); |
2527 | |
2528 | PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], |
2529 | StackPtr, MemOpChains, isTailCall, SPDiff); |
2530 | |
2531 | VA = ArgLocs[++i]; // skip ahead to next loc |
2532 | if (VA.isRegLoc()) { |
2533 | PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], |
2534 | StackPtr, MemOpChains, isTailCall, SPDiff); |
2535 | } else { |
2536 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2536, __extension__ __PRETTY_FUNCTION__)); |
2537 | SDValue DstAddr; |
2538 | MachinePointerInfo DstInfo; |
2539 | std::tie(DstAddr, DstInfo) = |
2540 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); |
2541 | MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo)); |
2542 | } |
2543 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { |
2544 | PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], |
2545 | StackPtr, MemOpChains, isTailCall, SPDiff); |
2546 | } else if (VA.isRegLoc()) { |
2547 | if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && |
2548 | Outs[0].VT == MVT::i32) { |
2549 | assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2550, __extension__ __PRETTY_FUNCTION__)) |
2550 | "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2550, __extension__ __PRETTY_FUNCTION__)); |
2551 | assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2552, __extension__ __PRETTY_FUNCTION__)) |
2552 | "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2552, __extension__ __PRETTY_FUNCTION__)); |
2553 | isThisReturn = true; |
2554 | } |
2555 | const TargetOptions &Options = DAG.getTarget().Options; |
2556 | if (Options.EmitCallSiteInfo) |
2557 | CSInfo.emplace_back(VA.getLocReg(), i); |
2558 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
2559 | } else if (isByVal) { |
2560 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2560, __extension__ __PRETTY_FUNCTION__)); |
2561 | unsigned offset = 0; |
2562 | |
2563 | // True if this byval aggregate will be split between registers |
2564 | // and memory. |
2565 | unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); |
2566 | unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); |
2567 | |
2568 | if (CurByValIdx < ByValArgsCount) { |
2569 | |
2570 | unsigned RegBegin, RegEnd; |
2571 | CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); |
2572 | |
2573 | EVT PtrVT = |
2574 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
2575 | unsigned int i, j; |
2576 | for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { |
2577 | SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); |
2578 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); |
2579 | SDValue Load = |
2580 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), |
2581 | DAG.InferPtrAlign(AddArg)); |
2582 | MemOpChains.push_back(Load.getValue(1)); |
2583 | RegsToPass.push_back(std::make_pair(j, Load)); |
2584 | } |
2585 | |
2586 | // If parameter size outsides register area, "offset" value |
2587 | // helps us to calculate stack slot for remained part properly. |
2588 | offset = RegEnd - RegBegin; |
2589 | |
2590 | CCInfo.nextInRegsParam(); |
2591 | } |
2592 | |
2593 | if (Flags.getByValSize() > 4*offset) { |
2594 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
2595 | SDValue Dst; |
2596 | MachinePointerInfo DstInfo; |
2597 | std::tie(Dst, DstInfo) = |
2598 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); |
2599 | SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); |
2600 | SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); |
2601 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, |
2602 | MVT::i32); |
2603 | SDValue AlignNode = |
2604 | DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32); |
2605 | |
2606 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
2607 | SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; |
2608 | MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, |
2609 | Ops)); |
2610 | } |
2611 | } else { |
2612 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2612, __extension__ __PRETTY_FUNCTION__)); |
2613 | SDValue DstAddr; |
2614 | MachinePointerInfo DstInfo; |
2615 | std::tie(DstAddr, DstInfo) = |
2616 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); |
2617 | |
2618 | SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo); |
2619 | MemOpChains.push_back(Store); |
2620 | } |
2621 | } |
2622 | |
2623 | if (!MemOpChains.empty()) |
2624 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
2625 | |
2626 | // Build a sequence of copy-to-reg nodes chained together with token chain |
2627 | // and flag operands which copy the outgoing args into the appropriate regs. |
2628 | SDValue InFlag; |
2629 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
2630 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
2631 | RegsToPass[i].second, InFlag); |
2632 | InFlag = Chain.getValue(1); |
2633 | } |
2634 | |
2635 | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every |
2636 | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol |
2637 | // node so that legalize doesn't hack it. |
2638 | bool isDirect = false; |
2639 | |
2640 | const TargetMachine &TM = getTargetMachine(); |
2641 | const Module *Mod = MF.getFunction().getParent(); |
2642 | const GlobalValue *GVal = nullptr; |
2643 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) |
2644 | GVal = G->getGlobal(); |
2645 | bool isStub = |
2646 | !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO(); |
2647 | |
2648 | bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); |
2649 | bool isLocalARMFunc = false; |
2650 | auto PtrVt = getPointerTy(DAG.getDataLayout()); |
2651 | |
2652 | if (Subtarget->genLongCalls()) { |
2653 | assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2654, __extension__ __PRETTY_FUNCTION__)) |
2654 | "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2654, __extension__ __PRETTY_FUNCTION__)); |
2655 | // Handle a global address or an external symbol. If it's not one of |
2656 | // those, the target's already in a register, so we don't need to do |
2657 | // anything extra. |
2658 | if (isa<GlobalAddressSDNode>(Callee)) { |
2659 | // When generating execute-only code we use movw movt pair. |
2660 | // Currently execute-only is only available for architectures that |
2661 | // support movw movt, so we are safe to assume that. |
2662 | if (Subtarget->genExecuteOnly()) { |
2663 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2664, __extension__ __PRETTY_FUNCTION__)) |
2664 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2664, __extension__ __PRETTY_FUNCTION__)); |
2665 | ++NumMovwMovt; |
2666 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, |
2667 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); |
2668 | } else { |
2669 | // Create a constant pool entry for the callee address |
2670 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
2671 | ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( |
2672 | GVal, ARMPCLabelIndex, ARMCP::CPValue, 0); |
2673 | |
2674 | // Get the address of the callee into a register |
2675 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); |
2676 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); |
2677 | Callee = DAG.getLoad( |
2678 | PtrVt, dl, DAG.getEntryNode(), Addr, |
2679 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
2680 | } |
2681 | } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { |
2682 | const char *Sym = S->getSymbol(); |
2683 | |
2684 | // When generating execute-only code we use movw movt pair. |
2685 | // Currently execute-only is only available for architectures that |
2686 | // support movw movt, so we are safe to assume that. |
2687 | if (Subtarget->genExecuteOnly()) { |
2688 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2689, __extension__ __PRETTY_FUNCTION__)) |
2689 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2689, __extension__ __PRETTY_FUNCTION__)); |
2690 | ++NumMovwMovt; |
2691 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, |
2692 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); |
2693 | } else { |
2694 | // Create a constant pool entry for the callee address |
2695 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
2696 | ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( |
2697 | *DAG.getContext(), Sym, ARMPCLabelIndex, 0); |
2698 | |
2699 | // Get the address of the callee into a register |
2700 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); |
2701 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); |
2702 | Callee = DAG.getLoad( |
2703 | PtrVt, dl, DAG.getEntryNode(), Addr, |
2704 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
2705 | } |
2706 | } |
2707 | } else if (isa<GlobalAddressSDNode>(Callee)) { |
2708 | if (!PreferIndirect) { |
2709 | isDirect = true; |
2710 | bool isDef = GVal->isStrongDefinitionForLinker(); |
2711 | |
2712 | // ARM call to a local ARM function is predicable. |
2713 | isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); |
2714 | // tBX takes a register source operand. |
2715 | if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { |
2716 | assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?") ? void (0) : __assert_fail ( "Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2716, __extension__ __PRETTY_FUNCTION__)); |
2717 | Callee = DAG.getNode( |
2718 | ARMISD::WrapperPIC, dl, PtrVt, |
2719 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY)); |
2720 | Callee = DAG.getLoad( |
2721 | PtrVt, dl, DAG.getEntryNode(), Callee, |
2722 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), |
2723 | MachineMemOperand::MODereferenceable | |
2724 | MachineMemOperand::MOInvariant); |
2725 | } else if (Subtarget->isTargetCOFF()) { |
2726 | assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2727, __extension__ __PRETTY_FUNCTION__)) |
2727 | "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2727, __extension__ __PRETTY_FUNCTION__)); |
2728 | unsigned TargetFlags = ARMII::MO_NO_FLAG; |
2729 | if (GVal->hasDLLImportStorageClass()) |
2730 | TargetFlags = ARMII::MO_DLLIMPORT; |
2731 | else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal)) |
2732 | TargetFlags = ARMII::MO_COFFSTUB; |
2733 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0, |
2734 | TargetFlags); |
2735 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) |
2736 | Callee = |
2737 | DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), |
2738 | DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), |
2739 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
2740 | } else { |
2741 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0); |
2742 | } |
2743 | } |
2744 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
2745 | isDirect = true; |
2746 | // tBX takes a register source operand. |
2747 | const char *Sym = S->getSymbol(); |
2748 | if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { |
2749 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
2750 | ARMConstantPoolValue *CPV = |
2751 | ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, |
2752 | ARMPCLabelIndex, 4); |
2753 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); |
2754 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
2755 | Callee = DAG.getLoad( |
2756 | PtrVt, dl, DAG.getEntryNode(), CPAddr, |
2757 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
2758 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); |
2759 | Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); |
2760 | } else { |
2761 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); |
2762 | } |
2763 | } |
2764 | |
2765 | if (isCmseNSCall) { |
2766 | assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2767, __extension__ __PRETTY_FUNCTION__)) |
2767 | "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2767, __extension__ __PRETTY_FUNCTION__)); |
2768 | if (NumBytes > 0) { |
2769 | DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(), |
2770 | "call to non-secure function would " |
2771 | "require passing arguments on stack", |
2772 | dl.getDebugLoc()); |
2773 | DAG.getContext()->diagnose(Diag); |
2774 | } |
2775 | if (isStructRet) { |
2776 | DiagnosticInfoUnsupported Diag( |
2777 | DAG.getMachineFunction().getFunction(), |
2778 | "call to non-secure function would return value through pointer", |
2779 | dl.getDebugLoc()); |
2780 | DAG.getContext()->diagnose(Diag); |
2781 | } |
2782 | } |
2783 | |
2784 | // FIXME: handle tail calls differently. |
2785 | unsigned CallOpc; |
2786 | if (Subtarget->isThumb()) { |
2787 | if (GuardWithBTI) |
2788 | CallOpc = ARMISD::t2CALL_BTI; |
2789 | else if (isCmseNSCall) |
2790 | CallOpc = ARMISD::tSECALL; |
2791 | else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) |
2792 | CallOpc = ARMISD::CALL_NOLINK; |
2793 | else |
2794 | CallOpc = ARMISD::CALL; |
2795 | } else { |
2796 | if (!isDirect && !Subtarget->hasV5TOps()) |
2797 | CallOpc = ARMISD::CALL_NOLINK; |
2798 | else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && |
2799 | // Emit regular call when code size is the priority |
2800 | !Subtarget->hasMinSize()) |
2801 | // "mov lr, pc; b _foo" to avoid confusing the RSP |
2802 | CallOpc = ARMISD::CALL_NOLINK; |
2803 | else |
2804 | CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; |
2805 | } |
2806 | |
2807 | // We don't usually want to end the call-sequence here because we would tidy |
2808 | // the frame up *after* the call, however in the ABI-changing tail-call case |
2809 | // we've carefully laid out the parameters so that when sp is reset they'll be |
2810 | // in the correct location. |
2811 | if (isTailCall && !isSibCall) { |
2812 | Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl); |
2813 | InFlag = Chain.getValue(1); |
2814 | } |
2815 | |
2816 | std::vector<SDValue> Ops; |
2817 | Ops.push_back(Chain); |
2818 | Ops.push_back(Callee); |
2819 | |
2820 | if (isTailCall) { |
2821 | Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32)); |
2822 | } |
2823 | |
2824 | // Add argument registers to the end of the list so that they are known live |
2825 | // into the call. |
2826 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
2827 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
2828 | RegsToPass[i].second.getValueType())); |
2829 | |
2830 | // Add a register mask operand representing the call-preserved registers. |
2831 | const uint32_t *Mask; |
2832 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); |
2833 | if (isThisReturn) { |
2834 | // For 'this' returns, use the R0-preserving mask if applicable |
2835 | Mask = ARI->getThisReturnPreservedMask(MF, CallConv); |
2836 | if (!Mask) { |
2837 | // Set isThisReturn to false if the calling convention is not one that |
2838 | // allows 'returned' to be modeled in this way, so LowerCallResult does |
2839 | // not try to pass 'this' straight through |
2840 | isThisReturn = false; |
2841 | Mask = ARI->getCallPreservedMask(MF, CallConv); |
2842 | } |
2843 | } else |
2844 | Mask = ARI->getCallPreservedMask(MF, CallConv); |
2845 | |
2846 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2846, __extension__ __PRETTY_FUNCTION__)); |
2847 | Ops.push_back(DAG.getRegisterMask(Mask)); |
2848 | |
2849 | if (InFlag.getNode()) |
2850 | Ops.push_back(InFlag); |
2851 | |
2852 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
2853 | if (isTailCall) { |
2854 | MF.getFrameInfo().setHasTailCall(); |
2855 | SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); |
2856 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
2857 | return Ret; |
2858 | } |
2859 | |
2860 | // Returns a chain and a flag for retval copy to use. |
2861 | Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); |
2862 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
2863 | InFlag = Chain.getValue(1); |
2864 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
2865 | |
2866 | // If we're guaranteeing tail-calls will be honoured, the callee must |
2867 | // pop its own argument stack on return. But this call is *not* a tail call so |
2868 | // we need to undo that after it returns to restore the status-quo. |
2869 | bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; |
2870 | uint64_t CalleePopBytes = |
2871 | canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL; |
2872 | |
2873 | Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl); |
2874 | if (!Ins.empty()) |
2875 | InFlag = Chain.getValue(1); |
2876 | |
2877 | // Handle result values, copying them out of physregs into vregs that we |
2878 | // return. |
2879 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
2880 | InVals, isThisReturn, |
2881 | isThisReturn ? OutVals[0] : SDValue()); |
2882 | } |
2883 | |
2884 | /// HandleByVal - Every parameter *after* a byval parameter is passed |
2885 | /// on the stack. Remember the next parameter register to allocate, |
2886 | /// and then confiscate the rest of the parameter registers to insure |
2887 | /// this. |
2888 | void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, |
2889 | Align Alignment) const { |
2890 | // Byval (as with any stack) slots are always at least 4 byte aligned. |
2891 | Alignment = std::max(Alignment, Align(4)); |
2892 | |
2893 | unsigned Reg = State->AllocateReg(GPRArgRegs); |
2894 | if (!Reg) |
2895 | return; |
2896 | |
2897 | unsigned AlignInRegs = Alignment.value() / 4; |
2898 | unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; |
2899 | for (unsigned i = 0; i < Waste; ++i) |
2900 | Reg = State->AllocateReg(GPRArgRegs); |
2901 | |
2902 | if (!Reg) |
2903 | return; |
2904 | |
2905 | unsigned Excess = 4 * (ARM::R4 - Reg); |
2906 | |
2907 | // Special case when NSAA != SP and parameter size greater than size of |
2908 | // all remained GPR regs. In that case we can't split parameter, we must |
2909 | // send it to stack. We also must set NCRN to R4, so waste all |
2910 | // remained registers. |
2911 | const unsigned NSAAOffset = State->getNextStackOffset(); |
2912 | if (NSAAOffset != 0 && Size > Excess) { |
2913 | while (State->AllocateReg(GPRArgRegs)) |
2914 | ; |
2915 | return; |
2916 | } |
2917 | |
2918 | // First register for byval parameter is the first register that wasn't |
2919 | // allocated before this method call, so it would be "reg". |
2920 | // If parameter is small enough to be saved in range [reg, r4), then |
2921 | // the end (first after last) register would be reg + param-size-in-regs, |
2922 | // else parameter would be splitted between registers and stack, |
2923 | // end register would be r4 in this case. |
2924 | unsigned ByValRegBegin = Reg; |
2925 | unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); |
2926 | State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); |
2927 | // Note, first register is allocated in the beginning of function already, |
2928 | // allocate remained amount of registers we need. |
2929 | for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) |
2930 | State->AllocateReg(GPRArgRegs); |
2931 | // A byval parameter that is split between registers and memory needs its |
2932 | // size truncated here. |
2933 | // In the case where the entire structure fits in registers, we set the |
2934 | // size in memory to zero. |
2935 | Size = std::max<int>(Size - Excess, 0); |
2936 | } |
2937 | |
2938 | /// MatchingStackOffset - Return true if the given stack call argument is |
2939 | /// already available in the same position (relatively) of the caller's |
2940 | /// incoming argument stack. |
2941 | static |
2942 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
2943 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
2944 | const TargetInstrInfo *TII) { |
2945 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
2946 | int FI = std::numeric_limits<int>::max(); |
2947 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
2948 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
2949 | if (!VR.isVirtual()) |
2950 | return false; |
2951 | MachineInstr *Def = MRI->getVRegDef(VR); |
2952 | if (!Def) |
2953 | return false; |
2954 | if (!Flags.isByVal()) { |
2955 | if (!TII->isLoadFromStackSlot(*Def, FI)) |
2956 | return false; |
2957 | } else { |
2958 | return false; |
2959 | } |
2960 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
2961 | if (Flags.isByVal()) |
2962 | // ByVal argument is passed in as a pointer but it's now being |
2963 | // dereferenced. e.g. |
2964 | // define @foo(%struct.X* %A) { |
2965 | // tail call @bar(%struct.X* byval %A) |
2966 | // } |
2967 | return false; |
2968 | SDValue Ptr = Ld->getBasePtr(); |
2969 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
2970 | if (!FINode) |
2971 | return false; |
2972 | FI = FINode->getIndex(); |
2973 | } else |
2974 | return false; |
2975 | |
2976 | assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int> ::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2976, __extension__ __PRETTY_FUNCTION__)); |
2977 | if (!MFI.isFixedObjectIndex(FI)) |
2978 | return false; |
2979 | return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); |
2980 | } |
2981 | |
2982 | /// IsEligibleForTailCallOptimization - Check whether the call is eligible |
2983 | /// for tail call optimization. Targets which want to do tail call |
2984 | /// optimization should implement this function. |
2985 | bool ARMTargetLowering::IsEligibleForTailCallOptimization( |
2986 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
2987 | bool isCalleeStructRet, bool isCallerStructRet, |
2988 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2989 | const SmallVectorImpl<SDValue> &OutVals, |
2990 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG, |
2991 | const bool isIndirect) const { |
2992 | MachineFunction &MF = DAG.getMachineFunction(); |
2993 | const Function &CallerF = MF.getFunction(); |
2994 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
2995 | |
2996 | assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ? void (0) : __assert_fail ("Subtarget->supportsTailCall()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2996, __extension__ __PRETTY_FUNCTION__)); |
2997 | |
2998 | // Indirect tail calls cannot be optimized for Thumb1 if the args |
2999 | // to the call take up r0-r3. The reason is that there are no legal registers |
3000 | // left to hold the pointer to the function to be called. |
3001 | // Similarly, if the function uses return address sign and authentication, |
3002 | // r12 is needed to hold the PAC and is not available to hold the callee |
3003 | // address. |
3004 | if (Outs.size() >= 4 && |
3005 | (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { |
3006 | if (Subtarget->isThumb1Only()) |
3007 | return false; |
3008 | // Conservatively assume the function spills LR. |
3009 | if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) |
3010 | return false; |
3011 | } |
3012 | |
3013 | // Look for obvious safe cases to perform tail call optimization that do not |
3014 | // require ABI changes. This is what gcc calls sibcall. |
3015 | |
3016 | // Exception-handling functions need a special set of instructions to indicate |
3017 | // a return to the hardware. Tail-calling another function would probably |
3018 | // break this. |
3019 | if (CallerF.hasFnAttribute("interrupt")) |
3020 | return false; |
3021 | |
3022 | if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) |
3023 | return CalleeCC == CallerCC; |
3024 | |
3025 | // Also avoid sibcall optimization if either caller or callee uses struct |
3026 | // return semantics. |
3027 | if (isCalleeStructRet || isCallerStructRet) |
3028 | return false; |
3029 | |
3030 | // Externally-defined functions with weak linkage should not be |
3031 | // tail-called on ARM when the OS does not support dynamic |
3032 | // pre-emption of symbols, as the AAELF spec requires normal calls |
3033 | // to undefined weak functions to be replaced with a NOP or jump to the |
3034 | // next instruction. The behaviour of branch instructions in this |
3035 | // situation (as used for tail calls) is implementation-defined, so we |
3036 | // cannot rely on the linker replacing the tail call with a return. |
3037 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
3038 | const GlobalValue *GV = G->getGlobal(); |
3039 | const Triple &TT = getTargetMachine().getTargetTriple(); |
3040 | if (GV->hasExternalWeakLinkage() && |
3041 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) |
3042 | return false; |
3043 | } |
3044 | |
3045 | // Check that the call results are passed in the same way. |
3046 | LLVMContext &C = *DAG.getContext(); |
3047 | if (!CCState::resultsCompatible( |
3048 | getEffectiveCallingConv(CalleeCC, isVarArg), |
3049 | getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, |
3050 | CCAssignFnForReturn(CalleeCC, isVarArg), |
3051 | CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) |
3052 | return false; |
3053 | // The callee has to preserve all registers the caller needs to preserve. |
3054 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
3055 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
3056 | if (CalleeCC != CallerCC) { |
3057 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
3058 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
3059 | return false; |
3060 | } |
3061 | |
3062 | // If Caller's vararg or byval argument has been split between registers and |
3063 | // stack, do not perform tail call, since part of the argument is in caller's |
3064 | // local frame. |
3065 | const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); |
3066 | if (AFI_Caller->getArgRegsSaveSize()) |
3067 | return false; |
3068 | |
3069 | // If the callee takes no arguments then go on to check the results of the |
3070 | // call. |
3071 | if (!Outs.empty()) { |
3072 | // Check if stack adjustment is needed. For now, do not do this if any |
3073 | // argument is passed on the stack. |
3074 | SmallVector<CCValAssign, 16> ArgLocs; |
3075 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
3076 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); |
3077 | if (CCInfo.getNextStackOffset()) { |
3078 | // Check if the arguments are already laid out in the right way as |
3079 | // the caller's fixed stack objects. |
3080 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3081 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
3082 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
3083 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); |
3084 | i != e; |
3085 | ++i, ++realArgIdx) { |
3086 | CCValAssign &VA = ArgLocs[i]; |
3087 | EVT RegVT = VA.getLocVT(); |
3088 | SDValue Arg = OutVals[realArgIdx]; |
3089 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; |
3090 | if (VA.getLocInfo() == CCValAssign::Indirect) |
3091 | return false; |
3092 | if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { |
3093 | // f64 and vector types are split into multiple registers or |
3094 | // register/stack-slot combinations. The types will not match |
3095 | // the registers; give up on memory f64 refs until we figure |
3096 | // out what to do about this. |
3097 | if (!VA.isRegLoc()) |
3098 | return false; |
3099 | if (!ArgLocs[++i].isRegLoc()) |
3100 | return false; |
3101 | if (RegVT == MVT::v2f64) { |
3102 | if (!ArgLocs[++i].isRegLoc()) |
3103 | return false; |
3104 | if (!ArgLocs[++i].isRegLoc()) |
3105 | return false; |
3106 | } |
3107 | } else if (!VA.isRegLoc()) { |
3108 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, |
3109 | MFI, MRI, TII)) |
3110 | return false; |
3111 | } |
3112 | } |
3113 | } |
3114 | |
3115 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
3116 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
3117 | return false; |
3118 | } |
3119 | |
3120 | return true; |
3121 | } |
3122 | |
3123 | bool |
3124 | ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, |
3125 | MachineFunction &MF, bool isVarArg, |
3126 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
3127 | LLVMContext &Context) const { |
3128 | SmallVector<CCValAssign, 16> RVLocs; |
3129 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
3130 | return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); |
3131 | } |
3132 | |
3133 | static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, |
3134 | const SDLoc &DL, SelectionDAG &DAG) { |
3135 | const MachineFunction &MF = DAG.getMachineFunction(); |
3136 | const Function &F = MF.getFunction(); |
3137 | |
3138 | StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); |
3139 | |
3140 | // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset |
3141 | // version of the "preferred return address". These offsets affect the return |
3142 | // instruction if this is a return from PL1 without hypervisor extensions. |
3143 | // IRQ/FIQ: +4 "subs pc, lr, #4" |
3144 | // SWI: 0 "subs pc, lr, #0" |
3145 | // ABORT: +4 "subs pc, lr, #4" |
3146 | // UNDEF: +4/+2 "subs pc, lr, #0" |
3147 | // UNDEF varies depending on where the exception came from ARM or Thumb |
3148 | // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. |
3149 | |
3150 | int64_t LROffset; |
3151 | if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || |
3152 | IntKind == "ABORT") |
3153 | LROffset = 4; |
3154 | else if (IntKind == "SWI" || IntKind == "UNDEF") |
3155 | LROffset = 0; |
3156 | else |
3157 | report_fatal_error("Unsupported interrupt attribute. If present, value " |
3158 | "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); |
3159 | |
3160 | RetOps.insert(RetOps.begin() + 1, |
3161 | DAG.getConstant(LROffset, DL, MVT::i32, false)); |
3162 | |
3163 | return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); |
3164 | } |
3165 | |
3166 | SDValue |
3167 | ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
3168 | bool isVarArg, |
3169 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
3170 | const SmallVectorImpl<SDValue> &OutVals, |
3171 | const SDLoc &dl, SelectionDAG &DAG) const { |
3172 | // CCValAssign - represent the assignment of the return value to a location. |
3173 | SmallVector<CCValAssign, 16> RVLocs; |
3174 | |
3175 | // CCState - Info about the registers and stack slots. |
3176 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
3177 | *DAG.getContext()); |
3178 | |
3179 | // Analyze outgoing return values. |
3180 | CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); |
3181 | |
3182 | SDValue Flag; |
3183 | SmallVector<SDValue, 4> RetOps; |
3184 | RetOps.push_back(Chain); // Operand #0 = Chain (updated below) |
3185 | bool isLittleEndian = Subtarget->isLittle(); |
3186 | |
3187 | MachineFunction &MF = DAG.getMachineFunction(); |
3188 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
3189 | AFI->setReturnRegsCount(RVLocs.size()); |
3190 | |
3191 | // Report error if cmse entry function returns structure through first ptr arg. |
3192 | if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) { |
3193 | // Note: using an empty SDLoc(), as the first line of the function is a |
3194 | // better place to report than the last line. |
3195 | DiagnosticInfoUnsupported Diag( |
3196 | DAG.getMachineFunction().getFunction(), |
3197 | "secure entry function would return value through pointer", |
3198 | SDLoc().getDebugLoc()); |
3199 | DAG.getContext()->diagnose(Diag); |
3200 | } |
3201 | |
3202 | // Copy the result values into the output registers. |
3203 | for (unsigned i = 0, realRVLocIdx = 0; |
3204 | i != RVLocs.size(); |
3205 | ++i, ++realRVLocIdx) { |
3206 | CCValAssign &VA = RVLocs[i]; |
3207 | assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!" ) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3207, __extension__ __PRETTY_FUNCTION__)); |
3208 | |
3209 | SDValue Arg = OutVals[realRVLocIdx]; |
3210 | bool ReturnF16 = false; |
3211 | |
3212 | if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { |
3213 | // Half-precision return values can be returned like this: |
3214 | // |
3215 | // t11 f16 = fadd ... |
3216 | // t12: i16 = bitcast t11 |
3217 | // t13: i32 = zero_extend t12 |
3218 | // t14: f32 = bitcast t13 <~~~~~~~ Arg |
3219 | // |
3220 | // to avoid code generation for bitcasts, we simply set Arg to the node |
3221 | // that produces the f16 value, t11 in this case. |
3222 | // |
3223 | if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) { |
3224 | SDValue ZE = Arg.getOperand(0); |
3225 | if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) { |
3226 | SDValue BC = ZE.getOperand(0); |
3227 | if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) { |
3228 | Arg = BC.getOperand(0); |
3229 | ReturnF16 = true; |
3230 | } |
3231 | } |
3232 | } |
3233 | } |
3234 | |
3235 | switch (VA.getLocInfo()) { |
3236 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3236); |
3237 | case CCValAssign::Full: break; |
3238 | case CCValAssign::BCvt: |
3239 | if (!ReturnF16) |
3240 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); |
3241 | break; |
3242 | } |
3243 | |
3244 | // Mask f16 arguments if this is a CMSE nonsecure entry. |
3245 | auto RetVT = Outs[realRVLocIdx].ArgVT; |
3246 | if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) { |
3247 | if (VA.needsCustom() && VA.getValVT() == MVT::f16) { |
3248 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); |
3249 | } else { |
3250 | auto LocBits = VA.getLocVT().getSizeInBits(); |
3251 | auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits()); |
3252 | SDValue Mask = |
3253 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); |
3254 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); |
3255 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); |
3256 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); |
3257 | } |
3258 | } |
3259 | |
3260 | if (VA.needsCustom() && |
3261 | (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) { |
3262 | if (VA.getLocVT() == MVT::v2f64) { |
3263 | // Extract the first half and return it in two registers. |
3264 | SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
3265 | DAG.getConstant(0, dl, MVT::i32)); |
3266 | SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, |
3267 | DAG.getVTList(MVT::i32, MVT::i32), Half); |
3268 | |
3269 | Chain = |
3270 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), |
3271 | HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); |
3272 | Flag = Chain.getValue(1); |
3273 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
3274 | VA = RVLocs[++i]; // skip ahead to next loc |
3275 | Chain = |
3276 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), |
3277 | HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); |
3278 | Flag = Chain.getValue(1); |
3279 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
3280 | VA = RVLocs[++i]; // skip ahead to next loc |
3281 | |
3282 | // Extract the 2nd half and fall through to handle it as an f64 value. |
3283 | Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, |
3284 | DAG.getConstant(1, dl, MVT::i32)); |
3285 | } |
3286 | // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is |
3287 | // available. |
3288 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, |
3289 | DAG.getVTList(MVT::i32, MVT::i32), Arg); |
3290 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), |
3291 | fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); |
3292 | Flag = Chain.getValue(1); |
3293 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); |
3294 | VA = RVLocs[++i]; // skip ahead to next loc |
3295 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), |
3296 | fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); |
3297 | } else |
3298 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); |
3299 | |
3300 | // Guarantee that all emitted copies are |
3301 | // stuck together, avoiding something bad. |
3302 | Flag = Chain.getValue(1); |
3303 | RetOps.push_back(DAG.getRegister( |
3304 | VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT())); |
3305 | } |
3306 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); |
3307 | const MCPhysReg *I = |
3308 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
3309 | if (I) { |
3310 | for (; *I; ++I) { |
3311 | if (ARM::GPRRegClass.contains(*I)) |
3312 | RetOps.push_back(DAG.getRegister(*I, MVT::i32)); |
3313 | else if (ARM::DPRRegClass.contains(*I)) |
3314 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); |
3315 | else |
3316 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3316); |
3317 | } |
3318 | } |
3319 | |
3320 | // Update chain and glue. |
3321 | RetOps[0] = Chain; |
3322 | if (Flag.getNode()) |
3323 | RetOps.push_back(Flag); |
3324 | |
3325 | // CPUs which aren't M-class use a special sequence to return from |
3326 | // exceptions (roughly, any instruction setting pc and cpsr simultaneously, |
3327 | // though we use "subs pc, lr, #N"). |
3328 | // |
3329 | // M-class CPUs actually use a normal return sequence with a special |
3330 | // (hardware-provided) value in LR, so the normal code path works. |
3331 | if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && |
3332 | !Subtarget->isMClass()) { |
3333 | if (Subtarget->isThumb1Only()) |
3334 | report_fatal_error("interrupt attribute is not supported in Thumb1"); |
3335 | return LowerInterruptReturn(RetOps, dl, DAG); |
3336 | } |
3337 | |
3338 | ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG : |
3339 | ARMISD::RET_FLAG; |
3340 | return DAG.getNode(RetNode, dl, MVT::Other, RetOps); |
3341 | } |
3342 | |
3343 | bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
3344 | if (N->getNumValues() != 1) |
3345 | return false; |
3346 | if (!N->hasNUsesOfValue(1, 0)) |
3347 | return false; |
3348 | |
3349 | SDValue TCChain = Chain; |
3350 | SDNode *Copy = *N->use_begin(); |
3351 | if (Copy->getOpcode() == ISD::CopyToReg) { |
3352 | // If the copy has a glue operand, we conservatively assume it isn't safe to |
3353 | // perform a tail call. |
3354 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
3355 | return false; |
3356 | TCChain = Copy->getOperand(0); |
3357 | } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { |
3358 | SDNode *VMov = Copy; |
3359 | // f64 returned in a pair of GPRs. |
3360 | SmallPtrSet<SDNode*, 2> Copies; |
3361 | for (SDNode *U : VMov->uses()) { |
3362 | if (U->getOpcode() != ISD::CopyToReg) |
3363 | return false; |
3364 | Copies.insert(U); |
3365 | } |
3366 | if (Copies.size() > 2) |
3367 | return false; |
3368 | |
3369 | for (SDNode *U : VMov->uses()) { |
3370 | SDValue UseChain = U->getOperand(0); |
3371 | if (Copies.count(UseChain.getNode())) |
3372 | // Second CopyToReg |
3373 | Copy = U; |
3374 | else { |
3375 | // We are at the top of this chain. |
3376 | // If the copy has a glue operand, we conservatively assume it |
3377 | // isn't safe to perform a tail call. |
3378 | if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue) |
3379 | return false; |
3380 | // First CopyToReg |
3381 | TCChain = UseChain; |
3382 | } |
3383 | } |
3384 | } else if (Copy->getOpcode() == ISD::BITCAST) { |
3385 | // f32 returned in a single GPR. |
3386 | if (!Copy->hasOneUse()) |
3387 | return false; |
3388 | Copy = *Copy->use_begin(); |
3389 | if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) |
3390 | return false; |
3391 | // If the copy has a glue operand, we conservatively assume it isn't safe to |
3392 | // perform a tail call. |
3393 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
3394 | return false; |
3395 | TCChain = Copy->getOperand(0); |
3396 | } else { |
3397 | return false; |
3398 | } |
3399 | |
3400 | bool HasRet = false; |
3401 | for (const SDNode *U : Copy->uses()) { |
3402 | if (U->getOpcode() != ARMISD::RET_FLAG && |
3403 | U->getOpcode() != ARMISD::INTRET_FLAG) |
3404 | return false; |
3405 | HasRet = true; |
3406 | } |
3407 | |
3408 | if (!HasRet) |
3409 | return false; |
3410 | |
3411 | Chain = TCChain; |
3412 | return true; |
3413 | } |
3414 | |
3415 | bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
3416 | if (!Subtarget->supportsTailCall()) |
3417 | return false; |
3418 | |
3419 | if (!CI->isTailCall()) |
3420 | return false; |
3421 | |
3422 | return true; |
3423 | } |
3424 | |
3425 | // Trying to write a 64 bit value so need to split into two 32 bit values first, |
3426 | // and pass the lower and high parts through. |
3427 | static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { |
3428 | SDLoc DL(Op); |
3429 | SDValue WriteValue = Op->getOperand(2); |
3430 | |
3431 | // This function is only supposed to be called for i64 type argument. |
3432 | assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3433, __extension__ __PRETTY_FUNCTION__)) |
3433 | && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3433, __extension__ __PRETTY_FUNCTION__)); |
3434 | |
3435 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, |
3436 | DAG.getConstant(0, DL, MVT::i32)); |
3437 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, |
3438 | DAG.getConstant(1, DL, MVT::i32)); |
3439 | SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; |
3440 | return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); |
3441 | } |
3442 | |
3443 | // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as |
3444 | // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is |
3445 | // one of the above mentioned nodes. It has to be wrapped because otherwise |
3446 | // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only |
3447 | // be used to form addressing mode. These wrapped nodes will be selected |
3448 | // into MOVi. |
3449 | SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, |
3450 | SelectionDAG &DAG) const { |
3451 | EVT PtrVT = Op.getValueType(); |
3452 | // FIXME there is no actual debug info here |
3453 | SDLoc dl(Op); |
3454 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
3455 | SDValue Res; |
3456 | |
3457 | // When generating execute-only code Constant Pools must be promoted to the |
3458 | // global data section. It's a bit ugly that we can't share them across basic |
3459 | // blocks, but this way we guarantee that execute-only behaves correct with |
3460 | // position-independent addressing modes. |
3461 | if (Subtarget->genExecuteOnly()) { |
3462 | auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); |
3463 | auto T = const_cast<Type*>(CP->getType()); |
3464 | auto C = const_cast<Constant*>(CP->getConstVal()); |
3465 | auto M = const_cast<Module*>(DAG.getMachineFunction(). |
3466 | getFunction().getParent()); |
3467 | auto GV = new GlobalVariable( |
3468 | *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, |
3469 | Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + |
3470 | Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + |
3471 | Twine(AFI->createPICLabelUId()) |
3472 | ); |
3473 | SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), |
3474 | dl, PtrVT); |
3475 | return LowerGlobalAddress(GA, DAG); |
3476 | } |
3477 | |
3478 | // The 16-bit ADR instruction can only encode offsets that are multiples of 4, |
3479 | // so we need to align to at least 4 bytes when we don't have 32-bit ADR. |
3480 | Align CPAlign = CP->getAlign(); |
3481 | if (Subtarget->isThumb1Only()) |
3482 | CPAlign = std::max(CPAlign, Align(4)); |
3483 | if (CP->isMachineConstantPoolEntry()) |
3484 | Res = |
3485 | DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign); |
3486 | else |
3487 | Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign); |
3488 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); |
3489 | } |
3490 | |
3491 | unsigned ARMTargetLowering::getJumpTableEncoding() const { |
3492 | return MachineJumpTableInfo::EK_Inline; |
3493 | } |
3494 | |
3495 | SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, |
3496 | SelectionDAG &DAG) const { |
3497 | MachineFunction &MF = DAG.getMachineFunction(); |
3498 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
3499 | unsigned ARMPCLabelIndex = 0; |
3500 | SDLoc DL(Op); |
3501 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3502 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); |
3503 | SDValue CPAddr; |
3504 | bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); |
3505 | if (!IsPositionIndependent) { |
3506 | CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4)); |
3507 | } else { |
3508 | unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; |
3509 | ARMPCLabelIndex = AFI->createPICLabelUId(); |
3510 | ARMConstantPoolValue *CPV = |
3511 | ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, |
3512 | ARMCP::CPBlockAddress, PCAdj); |
3513 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
3514 | } |
3515 | CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); |
3516 | SDValue Result = DAG.getLoad( |
3517 | PtrVT, DL, DAG.getEntryNode(), CPAddr, |
3518 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3519 | if (!IsPositionIndependent) |
3520 | return Result; |
3521 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); |
3522 | return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); |
3523 | } |
3524 | |
3525 | /// Convert a TLS address reference into the correct sequence of loads |
3526 | /// and calls to compute the variable's address for Darwin, and return an |
3527 | /// SDValue containing the final node. |
3528 | |
3529 | /// Darwin only has one TLS scheme which must be capable of dealing with the |
3530 | /// fully general situation, in the worst case. This means: |
3531 | /// + "extern __thread" declaration. |
3532 | /// + Defined in a possibly unknown dynamic library. |
3533 | /// |
3534 | /// The general system is that each __thread variable has a [3 x i32] descriptor |
3535 | /// which contains information used by the runtime to calculate the address. The |
3536 | /// only part of this the compiler needs to know about is the first word, which |
3537 | /// contains a function pointer that must be called with the address of the |
3538 | /// entire descriptor in "r0". |
3539 | /// |
3540 | /// Since this descriptor may be in a different unit, in general access must |
3541 | /// proceed along the usual ARM rules. A common sequence to produce is: |
3542 | /// |
3543 | /// movw rT1, :lower16:_var$non_lazy_ptr |
3544 | /// movt rT1, :upper16:_var$non_lazy_ptr |
3545 | /// ldr r0, [rT1] |
3546 | /// ldr rT2, [r0] |
3547 | /// blx rT2 |
3548 | /// [...address now in r0...] |
3549 | SDValue |
3550 | ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, |
3551 | SelectionDAG &DAG) const { |
3552 | assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3553, __extension__ __PRETTY_FUNCTION__)) |
3553 | "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3553, __extension__ __PRETTY_FUNCTION__)); |
3554 | SDLoc DL(Op); |
3555 | |
3556 | // First step is to get the address of the actua global symbol. This is where |
3557 | // the TLS descriptor lives. |
3558 | SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); |
3559 | |
3560 | // The first entry in the descriptor is a function pointer that we must call |
3561 | // to obtain the address of the variable. |
3562 | SDValue Chain = DAG.getEntryNode(); |
3563 | SDValue FuncTLVGet = DAG.getLoad( |
3564 | MVT::i32, DL, Chain, DescAddr, |
3565 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), |
3566 | MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | |
3567 | MachineMemOperand::MOInvariant); |
3568 | Chain = FuncTLVGet.getValue(1); |
3569 | |
3570 | MachineFunction &F = DAG.getMachineFunction(); |
3571 | MachineFrameInfo &MFI = F.getFrameInfo(); |
3572 | MFI.setAdjustsStack(true); |
3573 | |
3574 | // TLS calls preserve all registers except those that absolutely must be |
3575 | // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be |
3576 | // silly). |
3577 | auto TRI = |
3578 | getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); |
3579 | auto ARI = static_cast<const ARMRegisterInfo *>(TRI); |
3580 | const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); |
3581 | |
3582 | // Finally, we can make the call. This is just a degenerate version of a |
3583 | // normal AArch64 call node: r0 takes the address of the descriptor, and |
3584 | // returns the address of the variable in this thread. |
3585 | Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); |
3586 | Chain = |
3587 | DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), |
3588 | Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), |
3589 | DAG.getRegisterMask(Mask), Chain.getValue(1)); |
3590 | return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); |
3591 | } |
3592 | |
3593 | SDValue |
3594 | ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, |
3595 | SelectionDAG &DAG) const { |
3596 | assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows specific TLS lowering") ? void (0) : __assert_fail ( "Subtarget->isTargetWindows() && \"Windows specific TLS lowering\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3596, __extension__ __PRETTY_FUNCTION__)); |
3597 | |
3598 | SDValue Chain = DAG.getEntryNode(); |
3599 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3600 | SDLoc DL(Op); |
3601 | |
3602 | // Load the current TEB (thread environment block) |
3603 | SDValue Ops[] = {Chain, |
3604 | DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), |
3605 | DAG.getTargetConstant(15, DL, MVT::i32), |
3606 | DAG.getTargetConstant(0, DL, MVT::i32), |
3607 | DAG.getTargetConstant(13, DL, MVT::i32), |
3608 | DAG.getTargetConstant(0, DL, MVT::i32), |
3609 | DAG.getTargetConstant(2, DL, MVT::i32)}; |
3610 | SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, |
3611 | DAG.getVTList(MVT::i32, MVT::Other), Ops); |
3612 | |
3613 | SDValue TEB = CurrentTEB.getValue(0); |
3614 | Chain = CurrentTEB.getValue(1); |
3615 | |
3616 | // Load the ThreadLocalStoragePointer from the TEB |
3617 | // A pointer to the TLS array is located at offset 0x2c from the TEB. |
3618 | SDValue TLSArray = |
3619 | DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); |
3620 | TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); |
3621 | |
3622 | // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 |
3623 | // offset into the TLSArray. |
3624 | |
3625 | // Load the TLS index from the C runtime |
3626 | SDValue TLSIndex = |
3627 | DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); |
3628 | TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); |
3629 | TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); |
3630 | |
3631 | SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, |
3632 | DAG.getConstant(2, DL, MVT::i32)); |
3633 | SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, |
3634 | DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), |
3635 | MachinePointerInfo()); |
3636 | |
3637 | // Get the offset of the start of the .tls section (section base) |
3638 | const auto *GA = cast<GlobalAddressSDNode>(Op); |
3639 | auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); |
3640 | SDValue Offset = DAG.getLoad( |
3641 | PtrVT, DL, Chain, |
3642 | DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, |
3643 | DAG.getTargetConstantPool(CPV, PtrVT, Align(4))), |
3644 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3645 | |
3646 | return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); |
3647 | } |
3648 | |
3649 | // Lower ISD::GlobalTLSAddress using the "general dynamic" model |
3650 | SDValue |
3651 | ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, |
3652 | SelectionDAG &DAG) const { |
3653 | SDLoc dl(GA); |
3654 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3655 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; |
3656 | MachineFunction &MF = DAG.getMachineFunction(); |
3657 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
3658 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
3659 | ARMConstantPoolValue *CPV = |
3660 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, |
3661 | ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); |
3662 | SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
3663 | Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); |
3664 | Argument = DAG.getLoad( |
3665 | PtrVT, dl, DAG.getEntryNode(), Argument, |
3666 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3667 | SDValue Chain = Argument.getValue(1); |
3668 | |
3669 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); |
3670 | Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); |
3671 | |
3672 | // call __tls_get_addr. |
3673 | ArgListTy Args; |
3674 | ArgListEntry Entry; |
3675 | Entry.Node = Argument; |
3676 | Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); |
3677 | Args.push_back(Entry); |
3678 | |
3679 | // FIXME: is there useful debug info available here? |
3680 | TargetLowering::CallLoweringInfo CLI(DAG); |
3681 | CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( |
3682 | CallingConv::C, Type::getInt32Ty(*DAG.getContext()), |
3683 | DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); |
3684 | |
3685 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); |
3686 | return CallResult.first; |
3687 | } |
3688 | |
3689 | // Lower ISD::GlobalTLSAddress using the "initial exec" or |
3690 | // "local exec" model. |
3691 | SDValue |
3692 | ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, |
3693 | SelectionDAG &DAG, |
3694 | TLSModel::Model model) const { |
3695 | const GlobalValue *GV = GA->getGlobal(); |
3696 | SDLoc dl(GA); |
3697 | SDValue Offset; |
3698 | SDValue Chain = DAG.getEntryNode(); |
3699 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3700 | // Get the Thread Pointer |
3701 | SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); |
3702 | |
3703 | if (model == TLSModel::InitialExec) { |
3704 | MachineFunction &MF = DAG.getMachineFunction(); |
3705 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
3706 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
3707 | // Initial exec model. |
3708 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; |
3709 | ARMConstantPoolValue *CPV = |
3710 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, |
3711 | ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, |
3712 | true); |
3713 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
3714 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); |
3715 | Offset = DAG.getLoad( |
3716 | PtrVT, dl, Chain, Offset, |
3717 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3718 | Chain = Offset.getValue(1); |
3719 | |
3720 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); |
3721 | Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); |
3722 | |
3723 | Offset = DAG.getLoad( |
3724 | PtrVT, dl, Chain, Offset, |
3725 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3726 | } else { |
3727 | // local exec model |
3728 | assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void (0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3728, __extension__ __PRETTY_FUNCTION__)); |
3729 | ARMConstantPoolValue *CPV = |
3730 | ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); |
3731 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
3732 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); |
3733 | Offset = DAG.getLoad( |
3734 | PtrVT, dl, Chain, Offset, |
3735 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3736 | } |
3737 | |
3738 | // The address of the thread local variable is the add of the thread |
3739 | // pointer with the offset of the variable. |
3740 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); |
3741 | } |
3742 | |
3743 | SDValue |
3744 | ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { |
3745 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
3746 | if (DAG.getTarget().useEmulatedTLS()) |
3747 | return LowerToTLSEmulatedModel(GA, DAG); |
3748 | |
3749 | if (Subtarget->isTargetDarwin()) |
3750 | return LowerGlobalTLSAddressDarwin(Op, DAG); |
3751 | |
3752 | if (Subtarget->isTargetWindows()) |
3753 | return LowerGlobalTLSAddressWindows(Op, DAG); |
3754 | |
3755 | // TODO: implement the "local dynamic" model |
3756 | assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() && "Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3756, __extension__ __PRETTY_FUNCTION__)); |
3757 | TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); |
3758 | |
3759 | switch (model) { |
3760 | case TLSModel::GeneralDynamic: |
3761 | case TLSModel::LocalDynamic: |
3762 | return LowerToTLSGeneralDynamicModel(GA, DAG); |
3763 | case TLSModel::InitialExec: |
3764 | case TLSModel::LocalExec: |
3765 | return LowerToTLSExecModels(GA, DAG, model); |
3766 | } |
3767 | llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3767); |
3768 | } |
3769 | |
3770 | /// Return true if all users of V are within function F, looking through |
3771 | /// ConstantExprs. |
3772 | static bool allUsersAreInFunction(const Value *V, const Function *F) { |
3773 | SmallVector<const User*,4> Worklist(V->users()); |
3774 | while (!Worklist.empty()) { |
3775 | auto *U = Worklist.pop_back_val(); |
3776 | if (isa<ConstantExpr>(U)) { |
3777 | append_range(Worklist, U->users()); |
3778 | continue; |
3779 | } |
3780 | |
3781 | auto *I = dyn_cast<Instruction>(U); |
3782 | if (!I || I->getParent()->getParent() != F) |
3783 | return false; |
3784 | } |
3785 | return true; |
3786 | } |
3787 | |
3788 | static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, |
3789 | const GlobalValue *GV, SelectionDAG &DAG, |
3790 | EVT PtrVT, const SDLoc &dl) { |
3791 | // If we're creating a pool entry for a constant global with unnamed address, |
3792 | // and the global is small enough, we can emit it inline into the constant pool |
3793 | // to save ourselves an indirection. |
3794 | // |
3795 | // This is a win if the constant is only used in one function (so it doesn't |
3796 | // need to be duplicated) or duplicating the constant wouldn't increase code |
3797 | // size (implying the constant is no larger than 4 bytes). |
3798 | const Function &F = DAG.getMachineFunction().getFunction(); |
3799 | |
3800 | // We rely on this decision to inline being idemopotent and unrelated to the |
3801 | // use-site. We know that if we inline a variable at one use site, we'll |
3802 | // inline it elsewhere too (and reuse the constant pool entry). Fast-isel |
3803 | // doesn't know about this optimization, so bail out if it's enabled else |
3804 | // we could decide to inline here (and thus never emit the GV) but require |
3805 | // the GV from fast-isel generated code. |
3806 | if (!EnableConstpoolPromotion || |
3807 | DAG.getMachineFunction().getTarget().Options.EnableFastISel) |
3808 | return SDValue(); |
3809 | |
3810 | auto *GVar = dyn_cast<GlobalVariable>(GV); |
3811 | if (!GVar || !GVar->hasInitializer() || |
3812 | !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || |
3813 | !GVar->hasLocalLinkage()) |
3814 | return SDValue(); |
3815 | |
3816 | // If we inline a value that contains relocations, we move the relocations |
3817 | // from .data to .text. This is not allowed in position-independent code. |
3818 | auto *Init = GVar->getInitializer(); |
3819 | if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && |
3820 | Init->needsDynamicRelocation()) |
3821 | return SDValue(); |
3822 | |
3823 | // The constant islands pass can only really deal with alignment requests |
3824 | // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote |
3825 | // any type wanting greater alignment requirements than 4 bytes. We also |
3826 | // can only promote constants that are multiples of 4 bytes in size or |
3827 | // are paddable to a multiple of 4. Currently we only try and pad constants |
3828 | // that are strings for simplicity. |
3829 | auto *CDAInit = dyn_cast<ConstantDataArray>(Init); |
3830 | unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); |
3831 | Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar); |
3832 | unsigned RequiredPadding = 4 - (Size % 4); |
3833 | bool PaddingPossible = |
3834 | RequiredPadding == 4 || (CDAInit && CDAInit->isString()); |
3835 | if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize || |
3836 | Size == 0) |
3837 | return SDValue(); |
3838 | |
3839 | unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); |
3840 | MachineFunction &MF = DAG.getMachineFunction(); |
3841 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
3842 | |
3843 | // We can't bloat the constant pool too much, else the ConstantIslands pass |
3844 | // may fail to converge. If we haven't promoted this global yet (it may have |
3845 | // multiple uses), and promoting it would increase the constant pool size (Sz |
3846 | // > 4), ensure we have space to do so up to MaxTotal. |
3847 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) |
3848 | if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= |
3849 | ConstpoolPromotionMaxTotal) |
3850 | return SDValue(); |
3851 | |
3852 | // This is only valid if all users are in a single function; we can't clone |
3853 | // the constant in general. The LLVM IR unnamed_addr allows merging |
3854 | // constants, but not cloning them. |
3855 | // |
3856 | // We could potentially allow cloning if we could prove all uses of the |
3857 | // constant in the current function don't care about the address, like |
3858 | // printf format strings. But that isn't implemented for now. |
3859 | if (!allUsersAreInFunction(GVar, &F)) |
3860 | return SDValue(); |
3861 | |
3862 | // We're going to inline this global. Pad it out if needed. |
3863 | if (RequiredPadding != 4) { |
3864 | StringRef S = CDAInit->getAsString(); |
3865 | |
3866 | SmallVector<uint8_t,16> V(S.size()); |
3867 | std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); |
3868 | while (RequiredPadding--) |
3869 | V.push_back(0); |
3870 | Init = ConstantDataArray::get(*DAG.getContext(), V); |
3871 | } |
3872 | |
3873 | auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); |
3874 | SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4)); |
3875 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { |
3876 | AFI->markGlobalAsPromotedToConstantPool(GVar); |
3877 | AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + |
3878 | PaddedSize - 4); |
3879 | } |
3880 | ++NumConstpoolPromoted; |
3881 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
3882 | } |
3883 | |
3884 | bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { |
3885 | if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) |
3886 | if (!(GV = GA->getAliaseeObject())) |
3887 | return false; |
3888 | if (const auto *V = dyn_cast<GlobalVariable>(GV)) |
3889 | return V->isConstant(); |
3890 | return isa<Function>(GV); |
3891 | } |
3892 | |
3893 | SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, |
3894 | SelectionDAG &DAG) const { |
3895 | switch (Subtarget->getTargetTriple().getObjectFormat()) { |
3896 | default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3896); |
3897 | case Triple::COFF: |
3898 | return LowerGlobalAddressWindows(Op, DAG); |
3899 | case Triple::ELF: |
3900 | return LowerGlobalAddressELF(Op, DAG); |
3901 | case Triple::MachO: |
3902 | return LowerGlobalAddressDarwin(Op, DAG); |
3903 | } |
3904 | } |
3905 | |
3906 | SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, |
3907 | SelectionDAG &DAG) const { |
3908 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3909 | SDLoc dl(Op); |
3910 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); |
3911 | const TargetMachine &TM = getTargetMachine(); |
3912 | bool IsRO = isReadOnly(GV); |
3913 | |
3914 | // promoteToConstantPool only if not generating XO text section |
3915 | if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) |
3916 | if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) |
3917 | return V; |
3918 | |
3919 | if (isPositionIndependent()) { |
3920 | bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); |
3921 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, |
3922 | UseGOT_PREL ? ARMII::MO_GOT : 0); |
3923 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); |
3924 | if (UseGOT_PREL) |
3925 | Result = |
3926 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, |
3927 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
3928 | return Result; |
3929 | } else if (Subtarget->isROPI() && IsRO) { |
3930 | // PC-relative. |
3931 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); |
3932 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); |
3933 | return Result; |
3934 | } else if (Subtarget->isRWPI() && !IsRO) { |
3935 | // SB-relative. |
3936 | SDValue RelAddr; |
3937 | if (Subtarget->useMovt()) { |
3938 | ++NumMovwMovt; |
3939 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); |
3940 | RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); |
3941 | } else { // use literal pool for address constant |
3942 | ARMConstantPoolValue *CPV = |
3943 | ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); |
3944 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
3945 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
3946 | RelAddr = DAG.getLoad( |
3947 | PtrVT, dl, DAG.getEntryNode(), CPAddr, |
3948 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3949 | } |
3950 | SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); |
3951 | SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); |
3952 | return Result; |
3953 | } |
3954 | |
3955 | // If we have T2 ops, we can materialize the address directly via movt/movw |
3956 | // pair. This is always cheaper. |
3957 | if (Subtarget->useMovt()) { |
3958 | ++NumMovwMovt; |
3959 | // FIXME: Once remat is capable of dealing with instructions with register |
3960 | // operands, expand this into two nodes. |
3961 | return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, |
3962 | DAG.getTargetGlobalAddress(GV, dl, PtrVT)); |
3963 | } else { |
3964 | SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); |
3965 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
3966 | return DAG.getLoad( |
3967 | PtrVT, dl, DAG.getEntryNode(), CPAddr, |
3968 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
3969 | } |
3970 | } |
3971 | |
3972 | SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, |
3973 | SelectionDAG &DAG) const { |
3974 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3975, __extension__ __PRETTY_FUNCTION__)) |
3975 | "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3975, __extension__ __PRETTY_FUNCTION__)); |
3976 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3977 | SDLoc dl(Op); |
3978 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); |
3979 | |
3980 | if (Subtarget->useMovt()) |
3981 | ++NumMovwMovt; |
3982 | |
3983 | // FIXME: Once remat is capable of dealing with instructions with register |
3984 | // operands, expand this into multiple nodes |
3985 | unsigned Wrapper = |
3986 | isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; |
3987 | |
3988 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); |
3989 | SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); |
3990 | |
3991 | if (Subtarget->isGVIndirectSymbol(GV)) |
3992 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, |
3993 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
3994 | return Result; |
3995 | } |
3996 | |
3997 | SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, |
3998 | SelectionDAG &DAG) const { |
3999 | assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() && "non-Windows COFF is not supported") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3999, __extension__ __PRETTY_FUNCTION__)); |
4000 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4001, __extension__ __PRETTY_FUNCTION__)) |
4001 | "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4001, __extension__ __PRETTY_FUNCTION__)); |
4002 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4003, __extension__ __PRETTY_FUNCTION__)) |
4003 | "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4003, __extension__ __PRETTY_FUNCTION__)); |
4004 | |
4005 | const TargetMachine &TM = getTargetMachine(); |
4006 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); |
4007 | ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG; |
4008 | if (GV->hasDLLImportStorageClass()) |
4009 | TargetFlags = ARMII::MO_DLLIMPORT; |
4010 | else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) |
4011 | TargetFlags = ARMII::MO_COFFSTUB; |
4012 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
4013 | SDValue Result; |
4014 | SDLoc DL(Op); |
4015 | |
4016 | ++NumMovwMovt; |
4017 | |
4018 | // FIXME: Once remat is capable of dealing with instructions with register |
4019 | // operands, expand this into two nodes. |
4020 | Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, |
4021 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0, |
4022 | TargetFlags)); |
4023 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) |
4024 | Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, |
4025 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
4026 | return Result; |
4027 | } |
4028 | |
4029 | SDValue |
4030 | ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { |
4031 | SDLoc dl(Op); |
4032 | SDValue Val = DAG.getConstant(0, dl, MVT::i32); |
4033 | return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, |
4034 | DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), |
4035 | Op.getOperand(1), Val); |
4036 | } |
4037 | |
4038 | SDValue |
4039 | ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { |
4040 | SDLoc dl(Op); |
4041 | return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), |
4042 | Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); |
4043 | } |
4044 | |
4045 | SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, |
4046 | SelectionDAG &DAG) const { |
4047 | SDLoc dl(Op); |
4048 | return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, |
4049 | Op.getOperand(0)); |
4050 | } |
4051 | |
4052 | SDValue ARMTargetLowering::LowerINTRINSIC_VOID( |
4053 | SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { |
4054 | unsigned IntNo = |
4055 | cast<ConstantSDNode>( |
4056 | Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) |
4057 | ->getZExtValue(); |
4058 | switch (IntNo) { |
4059 | default: |
4060 | return SDValue(); // Don't custom lower most intrinsics. |
4061 | case Intrinsic::arm_gnu_eabi_mcount: { |
4062 | MachineFunction &MF = DAG.getMachineFunction(); |
4063 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
4064 | SDLoc dl(Op); |
4065 | SDValue Chain = Op.getOperand(0); |
4066 | // call "\01__gnu_mcount_nc" |
4067 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); |
4068 | const uint32_t *Mask = |
4069 | ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); |
4070 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4070, __extension__ __PRETTY_FUNCTION__)); |
4071 | // Mark LR an implicit live-in. |
4072 | Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); |
4073 | SDValue ReturnAddress = |
4074 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); |
4075 | constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue}; |
4076 | SDValue Callee = |
4077 | DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); |
4078 | SDValue RegisterMask = DAG.getRegisterMask(Mask); |
4079 | if (Subtarget->isThumb()) |
4080 | return SDValue( |
4081 | DAG.getMachineNode( |
4082 | ARM::tBL_PUSHLR, dl, ResultTys, |
4083 | {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), |
4084 | DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), |
4085 | 0); |
4086 | return SDValue( |
4087 | DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, |
4088 | {ReturnAddress, Callee, RegisterMask, Chain}), |
4089 | 0); |
4090 | } |
4091 | } |
4092 | } |
4093 | |
4094 | SDValue |
4095 | ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, |
4096 | const ARMSubtarget *Subtarget) const { |
4097 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
4098 | SDLoc dl(Op); |
4099 | switch (IntNo) { |
4100 | default: return SDValue(); // Don't custom lower most intrinsics. |
4101 | case Intrinsic::thread_pointer: { |
4102 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
4103 | return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); |
4104 | } |
4105 | case Intrinsic::arm_cls: { |
4106 | const SDValue &Operand = Op.getOperand(1); |
4107 | const EVT VTy = Op.getValueType(); |
4108 | SDValue SRA = |
4109 | DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); |
4110 | SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); |
4111 | SDValue SHL = |
4112 | DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); |
4113 | SDValue OR = |
4114 | DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); |
4115 | SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); |
4116 | return Result; |
4117 | } |
4118 | case Intrinsic::arm_cls64: { |
4119 | // cls(x) = if cls(hi(x)) != 31 then cls(hi(x)) |
4120 | // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x))) |
4121 | const SDValue &Operand = Op.getOperand(1); |
4122 | const EVT VTy = Op.getValueType(); |
4123 | |
4124 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, |
4125 | DAG.getConstant(1, dl, VTy)); |
4126 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand, |
4127 | DAG.getConstant(0, dl, VTy)); |
4128 | SDValue Constant0 = DAG.getConstant(0, dl, VTy); |
4129 | SDValue Constant1 = DAG.getConstant(1, dl, VTy); |
4130 | SDValue Constant31 = DAG.getConstant(31, dl, VTy); |
4131 | SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31); |
4132 | SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi); |
4133 | SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1); |
4134 | SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1); |
4135 | SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi); |
4136 | SDValue CheckLo = |
4137 | DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ); |
4138 | SDValue HiIsZero = |
4139 | DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ); |
4140 | SDValue AdjustedLo = |
4141 | DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy)); |
4142 | SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo); |
4143 | SDValue Result = |
4144 | DAG.getSelect(dl, VTy, CheckLo, |
4145 | DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi); |
4146 | return Result; |
4147 | } |
4148 | case Intrinsic::eh_sjlj_lsda: { |
4149 | MachineFunction &MF = DAG.getMachineFunction(); |
4150 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
4151 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); |
4152 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
4153 | SDValue CPAddr; |
4154 | bool IsPositionIndependent = isPositionIndependent(); |
4155 | unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; |
4156 | ARMConstantPoolValue *CPV = |
4157 | ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, |
4158 | ARMCP::CPLSDA, PCAdj); |
4159 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); |
4160 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); |
4161 | SDValue Result = DAG.getLoad( |
4162 | PtrVT, dl, DAG.getEntryNode(), CPAddr, |
4163 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); |
4164 | |
4165 | if (IsPositionIndependent) { |
4166 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); |
4167 | Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); |
4168 | } |
4169 | return Result; |
4170 | } |
4171 | case Intrinsic::arm_neon_vabs: |
4172 | return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), |
4173 | Op.getOperand(1)); |
4174 | case Intrinsic::arm_neon_vmulls: |
4175 | case Intrinsic::arm_neon_vmullu: { |
4176 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) |
4177 | ? ARMISD::VMULLs : ARMISD::VMULLu; |
4178 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), |
4179 | Op.getOperand(1), Op.getOperand(2)); |
4180 | } |
4181 | case Intrinsic::arm_neon_vminnm: |
4182 | case Intrinsic::arm_neon_vmaxnm: { |
4183 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) |
4184 | ? ISD::FMINNUM : ISD::FMAXNUM; |
4185 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), |
4186 | Op.getOperand(1), Op.getOperand(2)); |
4187 | } |
4188 | case Intrinsic::arm_neon_vminu: |
4189 | case Intrinsic::arm_neon_vmaxu: { |
4190 | if (Op.getValueType().isFloatingPoint()) |
4191 | return SDValue(); |
4192 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) |
4193 | ? ISD::UMIN : ISD::UMAX; |
4194 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), |
4195 | Op.getOperand(1), Op.getOperand(2)); |
4196 | } |
4197 | case Intrinsic::arm_neon_vmins: |
4198 | case Intrinsic::arm_neon_vmaxs: { |
4199 | // v{min,max}s is overloaded between signed integers and floats. |
4200 | if (!Op.getValueType().isFloatingPoint()) { |
4201 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) |
4202 | ? ISD::SMIN : ISD::SMAX; |
4203 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), |
4204 | Op.getOperand(1), Op.getOperand(2)); |
4205 | } |
4206 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) |
4207 | ? ISD::FMINIMUM : ISD::FMAXIMUM; |
4208 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), |
4209 | Op.getOperand(1), Op.getOperand(2)); |
4210 | } |
4211 | case Intrinsic::arm_neon_vtbl1: |
4212 | return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), |
4213 | Op.getOperand(1), Op.getOperand(2)); |
4214 | case Intrinsic::arm_neon_vtbl2: |
4215 | return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), |
4216 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
4217 | case Intrinsic::arm_mve_pred_i2v: |
4218 | case Intrinsic::arm_mve_pred_v2i: |
4219 | return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(), |
4220 | Op.getOperand(1)); |
4221 | case Intrinsic::arm_mve_vreinterpretq: |
4222 | return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), |
4223 | Op.getOperand(1)); |
4224 | case Intrinsic::arm_mve_lsll: |
4225 | return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), |
4226 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
4227 | case Intrinsic::arm_mve_asrl: |
4228 | return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), |
4229 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
4230 | } |
4231 | } |
4232 | |
4233 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, |
4234 | const ARMSubtarget *Subtarget) { |
4235 | SDLoc dl(Op); |
4236 | ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); |
4237 | auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); |
4238 | if (SSID == SyncScope::SingleThread) |
4239 | return Op; |
4240 | |
4241 | if (!Subtarget->hasDataBarrier()) { |
4242 | // Some ARMv6 cpus can support data barriers with an mcr instruction. |
4243 | // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get |
4244 | // here. |
4245 | assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4246, __extension__ __PRETTY_FUNCTION__)) |
4246 | "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4246, __extension__ __PRETTY_FUNCTION__)); |
4247 | return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), |
4248 | DAG.getConstant(0, dl, MVT::i32)); |
4249 | } |
4250 | |
4251 | ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); |
4252 | AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); |
4253 | ARM_MB::MemBOpt Domain = ARM_MB::ISH; |
4254 | if (Subtarget->isMClass()) { |
4255 | // Only a full system barrier exists in the M-class architectures. |
4256 | Domain = ARM_MB::SY; |
4257 | } else if (Subtarget->preferISHSTBarriers() && |
4258 | Ord == AtomicOrdering::Release) { |
4259 | // Swift happens to implement ISHST barriers in a way that's compatible with |
4260 | // Release semantics but weaker than ISH so we'd be fools not to use |
4261 | // it. Beware: other processors probably don't! |
4262 | Domain = ARM_MB::ISHST; |
4263 | } |
4264 | |
4265 | return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), |
4266 | DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), |
4267 | DAG.getConstant(Domain, dl, MVT::i32)); |
4268 | } |
4269 | |
4270 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, |
4271 | const ARMSubtarget *Subtarget) { |
4272 | // ARM pre v5TE and Thumb1 does not have preload instructions. |
4273 | if (!(Subtarget->isThumb2() || |
4274 | (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) |
4275 | // Just preserve the chain. |
4276 | return Op.getOperand(0); |
4277 | |
4278 | SDLoc dl(Op); |
4279 | unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; |
4280 | if (!isRead && |
4281 | (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) |
4282 | // ARMv7 with MP extension has PLDW. |
4283 | return Op.getOperand(0); |
4284 | |
4285 | unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); |
4286 | if (Subtarget->isThumb()) { |
4287 | // Invert the bits. |
4288 | isRead = ~isRead & 1; |
4289 | isData = ~isData & 1; |
4290 | } |
4291 | |
4292 | return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), |
4293 | Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), |
4294 | DAG.getConstant(isData, dl, MVT::i32)); |
4295 | } |
4296 | |
4297 | static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { |
4298 | MachineFunction &MF = DAG.getMachineFunction(); |
4299 | ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); |
4300 | |
4301 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
4302 | // memory location argument. |
4303 | SDLoc dl(Op); |
4304 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
4305 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
4306 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
4307 | return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), |
4308 | MachinePointerInfo(SV)); |
4309 | } |
4310 | |
4311 | SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, |
4312 | CCValAssign &NextVA, |
4313 | SDValue &Root, |
4314 | SelectionDAG &DAG, |
4315 | const SDLoc &dl) const { |
4316 | MachineFunction &MF = DAG.getMachineFunction(); |
4317 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
4318 | |
4319 | const TargetRegisterClass *RC; |
4320 | if (AFI->isThumb1OnlyFunction()) |
4321 | RC = &ARM::tGPRRegClass; |
4322 | else |
4323 | RC = &ARM::GPRRegClass; |
4324 | |
4325 | // Transform the arguments stored in physical registers into virtual ones. |
4326 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
4327 | SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); |
4328 | |
4329 | SDValue ArgValue2; |
4330 | if (NextVA.isMemLoc()) { |
4331 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4332 | int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); |
4333 | |
4334 | // Create load node to retrieve arguments from the stack. |
4335 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
4336 | ArgValue2 = DAG.getLoad( |
4337 | MVT::i32, dl, Root, FIN, |
4338 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
4339 | } else { |
4340 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); |
4341 | ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); |
4342 | } |
4343 | if (!Subtarget->isLittle()) |
4344 | std::swap (ArgValue, ArgValue2); |
4345 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); |
4346 | } |
4347 | |
4348 | // The remaining GPRs hold either the beginning of variable-argument |
4349 | // data, or the beginning of an aggregate passed by value (usually |
4350 | // byval). Either way, we allocate stack slots adjacent to the data |
4351 | // provided by our caller, and store the unallocated registers there. |
4352 | // If this is a variadic function, the va_list pointer will begin with |
4353 | // these values; otherwise, this reassembles a (byval) structure that |
4354 | // was split between registers and memory. |
4355 | // Return: The frame index registers were stored into. |
4356 | int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, |
4357 | const SDLoc &dl, SDValue &Chain, |
4358 | const Value *OrigArg, |
4359 | unsigned InRegsParamRecordIdx, |
4360 | int ArgOffset, unsigned ArgSize) const { |
4361 | // Currently, two use-cases possible: |
4362 | // Case #1. Non-var-args function, and we meet first byval parameter. |
4363 | // Setup first unallocated register as first byval register; |
4364 | // eat all remained registers |
4365 | // (these two actions are performed by HandleByVal method). |
4366 | // Then, here, we initialize stack frame with |
4367 | // "store-reg" instructions. |
4368 | // Case #2. Var-args function, that doesn't contain byval parameters. |
4369 | // The same: eat all remained unallocated registers, |
4370 | // initialize stack frame. |
4371 | |
4372 | MachineFunction &MF = DAG.getMachineFunction(); |
4373 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4374 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
4375 | unsigned RBegin, REnd; |
4376 | if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { |
4377 | CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); |
4378 | } else { |
4379 | unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); |
4380 | RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; |
4381 | REnd = ARM::R4; |
4382 | } |
4383 | |
4384 | if (REnd != RBegin) |
4385 | ArgOffset = -4 * (ARM::R4 - RBegin); |
4386 | |
4387 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
4388 | int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); |
4389 | SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); |
4390 | |
4391 | SmallVector<SDValue, 4> MemOps; |
4392 | const TargetRegisterClass *RC = |
4393 | AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; |
4394 | |
4395 | for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { |
4396 | Register VReg = MF.addLiveIn(Reg, RC); |
4397 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); |
4398 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, |
4399 | MachinePointerInfo(OrigArg, 4 * i)); |
4400 | MemOps.push_back(Store); |
4401 | FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); |
4402 | } |
4403 | |
4404 | if (!MemOps.empty()) |
4405 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); |
4406 | return FrameIndex; |
4407 | } |
4408 | |
4409 | // Setup stack frame, the va_list pointer will start from. |
4410 | void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, |
4411 | const SDLoc &dl, SDValue &Chain, |
4412 | unsigned ArgOffset, |
4413 | unsigned TotalArgRegsSaveSize, |
4414 | bool ForceMutable) const { |
4415 | MachineFunction &MF = DAG.getMachineFunction(); |
4416 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
4417 | |
4418 | // Try to store any remaining integer argument regs |
4419 | // to their spots on the stack so that they may be loaded by dereferencing |
4420 | // the result of va_next. |
4421 | // If there is no regs to be stored, just point address after last |
4422 | // argument passed via stack. |
4423 | int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, |
4424 | CCInfo.getInRegsParamsCount(), |
4425 | CCInfo.getNextStackOffset(), |
4426 | std::max(4U, TotalArgRegsSaveSize)); |
4427 | AFI->setVarArgsFrameIndex(FrameIndex); |
4428 | } |
4429 | |
4430 | bool ARMTargetLowering::splitValueIntoRegisterParts( |
4431 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
4432 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
4433 | EVT ValueVT = Val.getValueType(); |
4434 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { |
4435 | unsigned ValueBits = ValueVT.getSizeInBits(); |
4436 | unsigned PartBits = PartVT.getSizeInBits(); |
4437 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); |
4438 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); |
4439 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); |
4440 | Parts[0] = Val; |
4441 | return true; |
4442 | } |
4443 | return false; |
4444 | } |
4445 | |
4446 | SDValue ARMTargetLowering::joinRegisterPartsIntoValue( |
4447 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
4448 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
4449 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { |
4450 | unsigned ValueBits = ValueVT.getSizeInBits(); |
4451 | unsigned PartBits = PartVT.getSizeInBits(); |
4452 | SDValue Val = Parts[0]; |
4453 | |
4454 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); |
4455 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); |
4456 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); |
4457 | return Val; |
4458 | } |
4459 | return SDValue(); |
4460 | } |
4461 | |
4462 | SDValue ARMTargetLowering::LowerFormalArguments( |
4463 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
4464 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
4465 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
4466 | MachineFunction &MF = DAG.getMachineFunction(); |
4467 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4468 | |
4469 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
4470 | |
4471 | // Assign locations to all of the incoming arguments. |
4472 | SmallVector<CCValAssign, 16> ArgLocs; |
4473 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
4474 | *DAG.getContext()); |
4475 | CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); |
4476 | |
4477 | SmallVector<SDValue, 16> ArgValues; |
4478 | SDValue ArgValue; |
4479 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); |
4480 | unsigned CurArgIdx = 0; |
4481 | |
4482 | // Initially ArgRegsSaveSize is zero. |
4483 | // Then we increase this value each time we meet byval parameter. |
4484 | // We also increase this value in case of varargs function. |
4485 | AFI->setArgRegsSaveSize(0); |
4486 | |
4487 | // Calculate the amount of stack space that we need to allocate to store |
4488 | // byval and variadic arguments that are passed in registers. |
4489 | // We need to know this before we allocate the first byval or variadic |
4490 | // argument, as they will be allocated a stack slot below the CFA (Canonical |
4491 | // Frame Address, the stack pointer at entry to the function). |
4492 | unsigned ArgRegBegin = ARM::R4; |
4493 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4494 | if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) |
4495 | break; |
4496 | |
4497 | CCValAssign &VA = ArgLocs[i]; |
4498 | unsigned Index = VA.getValNo(); |
4499 | ISD::ArgFlagsTy Flags = Ins[Index].Flags; |
4500 | if (!Flags.isByVal()) |
4501 | continue; |
4502 | |
4503 | assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg" ) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4503, __extension__ __PRETTY_FUNCTION__)); |
4504 | unsigned RBegin, REnd; |
4505 | CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); |
4506 | ArgRegBegin = std::min(ArgRegBegin, RBegin); |
4507 | |
4508 | CCInfo.nextInRegsParam(); |
4509 | } |
4510 | CCInfo.rewindByValRegsInfo(); |
4511 | |
4512 | int lastInsIndex = -1; |
4513 | if (isVarArg && MFI.hasVAStart()) { |
4514 | unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); |
4515 | if (RegIdx != std::size(GPRArgRegs)) |
4516 | ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); |
4517 | } |
4518 | |
4519 | unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); |
4520 | AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); |
4521 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
4522 | |
4523 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4524 | CCValAssign &VA = ArgLocs[i]; |
4525 | if (Ins[VA.getValNo()].isOrigArg()) { |
4526 | std::advance(CurOrigArg, |
4527 | Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); |
4528 | CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); |
4529 | } |
4530 | // Arguments stored in registers. |
4531 | if (VA.isRegLoc()) { |
4532 | EVT RegVT = VA.getLocVT(); |
4533 | |
4534 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { |
4535 | // f64 and vector types are split up into multiple registers or |
4536 | // combinations of registers and stack slots. |
4537 | SDValue ArgValue1 = |
4538 | GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); |
4539 | VA = ArgLocs[++i]; // skip ahead to next loc |
4540 | SDValue ArgValue2; |
4541 | if (VA.isMemLoc()) { |
4542 | int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); |
4543 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
4544 | ArgValue2 = DAG.getLoad( |
4545 | MVT::f64, dl, Chain, FIN, |
4546 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
4547 | } else { |
4548 | ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); |
4549 | } |
4550 | ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); |
4551 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, |
4552 | ArgValue1, DAG.getIntPtrConstant(0, dl)); |
4553 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, |
4554 | ArgValue2, DAG.getIntPtrConstant(1, dl)); |
4555 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { |
4556 | ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); |
4557 | } else { |
4558 | const TargetRegisterClass *RC; |
4559 | |
4560 | if (RegVT == MVT::f16 || RegVT == MVT::bf16) |
4561 | RC = &ARM::HPRRegClass; |
4562 | else if (RegVT == MVT::f32) |
4563 | RC = &ARM::SPRRegClass; |
4564 | else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 || |
4565 | RegVT == MVT::v4bf16) |
4566 | RC = &ARM::DPRRegClass; |
4567 | else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 || |
4568 | RegVT == MVT::v8bf16) |
4569 | RC = &ARM::QPRRegClass; |
4570 | else if (RegVT == MVT::i32) |
4571 | RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass |
4572 | : &ARM::GPRRegClass; |
4573 | else |
4574 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4574); |
4575 | |
4576 | // Transform the arguments in physical registers into virtual ones. |
4577 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
4578 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); |
4579 | |
4580 | // If this value is passed in r0 and has the returned attribute (e.g. |
4581 | // C++ 'structors), record this fact for later use. |
4582 | if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { |
4583 | AFI->setPreservesR0(); |
4584 | } |
4585 | } |
4586 | |
4587 | // If this is an 8 or 16-bit value, it is really passed promoted |
4588 | // to 32 bits. Insert an assert[sz]ext to capture this, then |
4589 | // truncate to the right size. |
4590 | switch (VA.getLocInfo()) { |
4591 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 4591); |
4592 | case CCValAssign::Full: break; |
4593 | case CCValAssign::BCvt: |
4594 | ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); |
4595 | break; |
4596 | case CCValAssign::SExt: |
4597 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, |
4598 | DAG.getValueType(VA.getValVT())); |
4599 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
4600 | break; |
4601 | case CCValAssign::ZExt: |
4602 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, |
4603 | DAG.getValueType(VA.getValVT())); |
4604 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
4605 | break; |
4606 | } |
4607 | |
4608 | // f16 arguments have their size extended to 4 bytes and passed as if they |
4609 | // had been copied to the LSBs of a 32-bit register. |
4610 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) |
4611 | if (VA.needsCustom() && |
4612 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) |
4613 | ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue); |
4614 | |
4615 | InVals.push_back(ArgValue); |
4616 | } else { // VA.isRegLoc() |
4617 | // Only arguments passed on the stack should make it here. |
4618 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4618, __extension__ __PRETTY_FUNCTION__)); |
4619 | assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 && "i64 should already be lowered") ? void (0) : __assert_fail ( "VA.getValVT() != MVT::i64 && \"i64 should already be lowered\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4619, __extension__ __PRETTY_FUNCTION__)); |
4620 | |
4621 | int index = VA.getValNo(); |
4622 | |
4623 | // Some Ins[] entries become multiple ArgLoc[] entries. |
4624 | // Process them only once. |
4625 | if (index != lastInsIndex) |
4626 | { |
4627 | ISD::ArgFlagsTy Flags = Ins[index].Flags; |
4628 | // FIXME: For now, all byval parameter objects are marked mutable. |
4629 | // This can be changed with more analysis. |
4630 | // In case of tail call optimization mark all arguments mutable. |
4631 | // Since they could be overwritten by lowering of arguments in case of |
4632 | // a tail call. |
4633 | if (Flags.isByVal()) { |
4634 | assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4635, __extension__ __PRETTY_FUNCTION__)) |
4635 | "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4635, __extension__ __PRETTY_FUNCTION__)); |
4636 | unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); |
4637 | |
4638 | int FrameIndex = StoreByValRegs( |
4639 | CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, |
4640 | VA.getLocMemOffset(), Flags.getByValSize()); |
4641 | InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); |
4642 | CCInfo.nextInRegsParam(); |
4643 | } else { |
4644 | unsigned FIOffset = VA.getLocMemOffset(); |
4645 | int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, |
4646 | FIOffset, true); |
4647 | |
4648 | // Create load nodes to retrieve arguments from the stack. |
4649 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
4650 | InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, |
4651 | MachinePointerInfo::getFixedStack( |
4652 | DAG.getMachineFunction(), FI))); |
4653 | } |
4654 | lastInsIndex = index; |
4655 | } |
4656 | } |
4657 | } |
4658 | |
4659 | // varargs |
4660 | if (isVarArg && MFI.hasVAStart()) { |
4661 | VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), |
4662 | TotalArgRegsSaveSize); |
4663 | if (AFI->isCmseNSEntryFunction()) { |
4664 | DiagnosticInfoUnsupported Diag( |
4665 | DAG.getMachineFunction().getFunction(), |
4666 | "secure entry function must not be variadic", dl.getDebugLoc()); |
4667 | DAG.getContext()->diagnose(Diag); |
4668 | } |
4669 | } |
4670 | |
4671 | unsigned StackArgSize = CCInfo.getNextStackOffset(); |
4672 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; |
4673 | if (canGuaranteeTCO(CallConv, TailCallOpt)) { |
4674 | // The only way to guarantee a tail call is if the callee restores its |
4675 | // argument area, but it must also keep the stack aligned when doing so. |
4676 | const DataLayout &DL = DAG.getDataLayout(); |
4677 | StackArgSize = alignTo(StackArgSize, DL.getStackAlignment()); |
4678 | |
4679 | AFI->setArgumentStackToRestore(StackArgSize); |
4680 | } |
4681 | AFI->setArgumentStackSize(StackArgSize); |
4682 | |
4683 | if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) { |
4684 | DiagnosticInfoUnsupported Diag( |
4685 | DAG.getMachineFunction().getFunction(), |
4686 | "secure entry function requires arguments on stack", dl.getDebugLoc()); |
4687 | DAG.getContext()->diagnose(Diag); |
4688 | } |
4689 | |
4690 | return Chain; |
4691 | } |
4692 | |
4693 | /// isFloatingPointZero - Return true if this is +0.0. |
4694 | static bool isFloatingPointZero(SDValue Op) { |
4695 | if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) |
4696 | return CFP->getValueAPF().isPosZero(); |
4697 | else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { |
4698 | // Maybe this has already been legalized into the constant pool? |
4699 | if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { |
4700 | SDValue WrapperOp = Op.getOperand(1).getOperand(0); |
4701 | if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) |
4702 | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) |
4703 | return CFP->getValueAPF().isPosZero(); |
4704 | } |
4705 | } else if (Op->getOpcode() == ISD::BITCAST && |
4706 | Op->getValueType(0) == MVT::f64) { |
4707 | // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) |
4708 | // created by LowerConstantFP(). |
4709 | SDValue BitcastOp = Op->getOperand(0); |
4710 | if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && |
4711 | isNullConstant(BitcastOp->getOperand(0))) |
4712 | return true; |
4713 | } |
4714 | return false; |
4715 | } |
4716 | |
4717 | /// Returns appropriate ARM CMP (cmp) and corresponding condition code for |
4718 | /// the given operands. |
4719 | SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
4720 | SDValue &ARMcc, SelectionDAG &DAG, |
4721 | const SDLoc &dl) const { |
4722 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { |
4723 | unsigned C = RHSC->getZExtValue(); |
4724 | if (!isLegalICmpImmediate((int32_t)C)) { |
4725 | // Constant does not fit, try adjusting it by one. |
4726 | switch (CC) { |
4727 | default: break; |
4728 | case ISD::SETLT: |
4729 | case ISD::SETGE: |
4730 | if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { |
4731 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; |
4732 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); |
4733 | } |
4734 | break; |
4735 | case ISD::SETULT: |
4736 | case ISD::SETUGE: |
4737 | if (C != 0 && isLegalICmpImmediate(C-1)) { |
4738 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; |
4739 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); |
4740 | } |
4741 | break; |
4742 | case ISD::SETLE: |
4743 | case ISD::SETGT: |
4744 | if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { |
4745 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; |
4746 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); |
4747 | } |
4748 | break; |
4749 | case ISD::SETULE: |
4750 | case ISD::SETUGT: |
4751 | if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { |
4752 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; |
4753 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); |
4754 | } |
4755 | break; |
4756 | } |
4757 | } |
4758 | } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && |
4759 | (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { |
4760 | // In ARM and Thumb-2, the compare instructions can shift their second |
4761 | // operand. |
4762 | CC = ISD::getSetCCSwappedOperands(CC); |
4763 | std::swap(LHS, RHS); |
4764 | } |
4765 | |
4766 | // Thumb1 has very limited immediate modes, so turning an "and" into a |
4767 | // shift can save multiple instructions. |
4768 | // |
4769 | // If we have (x & C1), and C1 is an appropriate mask, we can transform it |
4770 | // into "((x << n) >> n)". But that isn't necessarily profitable on its |
4771 | // own. If it's the operand to an unsigned comparison with an immediate, |
4772 | // we can eliminate one of the shifts: we transform |
4773 | // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". |
4774 | // |
4775 | // We avoid transforming cases which aren't profitable due to encoding |
4776 | // details: |
4777 | // |
4778 | // 1. C2 fits into the immediate field of a cmp, and the transformed version |
4779 | // would not; in that case, we're essentially trading one immediate load for |
4780 | // another. |
4781 | // 2. C1 is 255 or 65535, so we can use uxtb or uxth. |
4782 | // 3. C2 is zero; we have other code for this special case. |
4783 | // |
4784 | // FIXME: Figure out profitability for Thumb2; we usually can't save an |
4785 | // instruction, since the AND is always one instruction anyway, but we could |
4786 | // use narrow instructions in some cases. |
4787 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && |
4788 | LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && |
4789 | LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && |
4790 | !isSignedIntSetCC(CC)) { |
4791 | unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); |
4792 | auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); |
4793 | uint64_t RHSV = RHSC->getZExtValue(); |
4794 | if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { |
4795 | unsigned ShiftBits = llvm::countl_zero(Mask); |
4796 | if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { |
4797 | SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); |
4798 | LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); |
4799 | RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); |
4800 | } |
4801 | } |
4802 | } |
4803 | |
4804 | // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a |
4805 | // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same |
4806 | // way a cmp would. |
4807 | // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and |
4808 | // some tweaks to the heuristics for the previous and->shift transform. |
4809 | // FIXME: Optimize cases where the LHS isn't a shift. |
4810 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && |
4811 | isa<ConstantSDNode>(RHS) && |
4812 | cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && |
4813 | CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && |
4814 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { |
4815 | unsigned ShiftAmt = |
4816 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; |
4817 | SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, |
4818 | DAG.getVTList(MVT::i32, MVT::i32), |
4819 | LHS.getOperand(0), |
4820 | DAG.getConstant(ShiftAmt, dl, MVT::i32)); |
4821 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, |
4822 | Shift.getValue(1), SDValue()); |
4823 | ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); |
4824 | return Chain.getValue(1); |
4825 | } |
4826 | |
4827 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); |
4828 | |
4829 | // If the RHS is a constant zero then the V (overflow) flag will never be |
4830 | // set. This can allow us to simplify GE to PL or LT to MI, which can be |
4831 | // simpler for other passes (like the peephole optimiser) to deal with. |
4832 | if (isNullConstant(RHS)) { |
4833 | switch (CondCode) { |
4834 | default: break; |
4835 | case ARMCC::GE: |
4836 | CondCode = ARMCC::PL; |
4837 | break; |
4838 | case ARMCC::LT: |
4839 | CondCode = ARMCC::MI; |
4840 | break; |
4841 | } |
4842 | } |
4843 | |
4844 | ARMISD::NodeType CompareType; |
4845 | switch (CondCode) { |
4846 | default: |
4847 | CompareType = ARMISD::CMP; |
4848 | break; |
4849 | case ARMCC::EQ: |
4850 | case ARMCC::NE: |
4851 | // Uses only Z Flag |
4852 | CompareType = ARMISD::CMPZ; |
4853 | break; |
4854 | } |
4855 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); |
4856 | return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); |
4857 | } |
4858 | |
4859 | /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. |
4860 | SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, |
4861 | SelectionDAG &DAG, const SDLoc &dl, |
4862 | bool Signaling) const { |
4863 | assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType () != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4863, __extension__ __PRETTY_FUNCTION__)); |
4864 | SDValue Cmp; |
4865 | if (!isFloatingPointZero(RHS)) |
4866 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, |
4867 | dl, MVT::Glue, LHS, RHS); |
4868 | else |
4869 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, |
4870 | dl, MVT::Glue, LHS); |
4871 | return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); |
4872 | } |
4873 | |
4874 | /// duplicateCmp - Glue values can have only one use, so this function |
4875 | /// duplicates a comparison node. |
4876 | SDValue |
4877 | ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { |
4878 | unsigned Opc = Cmp.getOpcode(); |
4879 | SDLoc DL(Cmp); |
4880 | if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) |
4881 | return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); |
4882 | |
4883 | assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation" ) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4883, __extension__ __PRETTY_FUNCTION__)); |
4884 | Cmp = Cmp.getOperand(0); |
4885 | Opc = Cmp.getOpcode(); |
4886 | if (Opc == ARMISD::CMPFP) |
4887 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); |
4888 | else { |
4889 | assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT" ) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4889, __extension__ __PRETTY_FUNCTION__)); |
4890 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); |
4891 | } |
4892 | return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); |
4893 | } |
4894 | |
4895 | // This function returns three things: the arithmetic computation itself |
4896 | // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The |
4897 | // comparison and the condition code define the case in which the arithmetic |
4898 | // computation *does not* overflow. |
4899 | std::pair<SDValue, SDValue> |
4900 | ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, |
4901 | SDValue &ARMcc) const { |
4902 | assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 && "Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4902, __extension__ __PRETTY_FUNCTION__)); |
4903 | |
4904 | SDValue Value, OverflowCmp; |
4905 | SDValue LHS = Op.getOperand(0); |
4906 | SDValue RHS = Op.getOperand(1); |
4907 | SDLoc dl(Op); |
4908 | |
4909 | // FIXME: We are currently always generating CMPs because we don't support |
4910 | // generating CMN through the backend. This is not as good as the natural |
4911 | // CMP case because it causes a register dependency and cannot be folded |
4912 | // later. |
4913 | |
4914 | switch (Op.getOpcode()) { |
4915 | default: |
4916 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4916); |
4917 | case ISD::SADDO: |
4918 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); |
4919 | Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); |
4920 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); |
4921 | break; |
4922 | case ISD::UADDO: |
4923 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); |
4924 | // We use ADDC here to correspond to its use in LowerUnsignedALUO. |
4925 | // We do not use it in the USUBO case as Value may not be used. |
4926 | Value = DAG.getNode(ARMISD::ADDC, dl, |
4927 | DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) |
4928 | .getValue(0); |
4929 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); |
4930 | break; |
4931 | case ISD::SSUBO: |
4932 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); |
4933 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); |
4934 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); |
4935 | break; |
4936 | case ISD::USUBO: |
4937 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); |
4938 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); |
4939 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); |
4940 | break; |
4941 | case ISD::UMULO: |
4942 | // We generate a UMUL_LOHI and then check if the high word is 0. |
4943 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); |
4944 | Value = DAG.getNode(ISD::UMUL_LOHI, dl, |
4945 | DAG.getVTList(Op.getValueType(), Op.getValueType()), |
4946 | LHS, RHS); |
4947 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), |
4948 | DAG.getConstant(0, dl, MVT::i32)); |
4949 | Value = Value.getValue(0); // We only want the low 32 bits for the result. |
4950 | break; |
4951 | case ISD::SMULO: |
4952 | // We generate a SMUL_LOHI and then check if all the bits of the high word |
4953 | // are the same as the sign bit of the low word. |
4954 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); |
4955 | Value = DAG.getNode(ISD::SMUL_LOHI, dl, |
4956 | DAG.getVTList(Op.getValueType(), Op.getValueType()), |
4957 | LHS, RHS); |
4958 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), |
4959 | DAG.getNode(ISD::SRA, dl, Op.getValueType(), |
4960 | Value.getValue(0), |
4961 | DAG.getConstant(31, dl, MVT::i32))); |
4962 | Value = Value.getValue(0); // We only want the low 32 bits for the result. |
4963 | break; |
4964 | } // switch (...) |
4965 | |
4966 | return std::make_pair(Value, OverflowCmp); |
4967 | } |
4968 | |
4969 | SDValue |
4970 | ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { |
4971 | // Let legalize expand this if it isn't a legal type yet. |
4972 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) |
4973 | return SDValue(); |
4974 | |
4975 | SDValue Value, OverflowCmp; |
4976 | SDValue ARMcc; |
4977 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); |
4978 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
4979 | SDLoc dl(Op); |
4980 | // We use 0 and 1 as false and true values. |
4981 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
4982 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
4983 | EVT VT = Op.getValueType(); |
4984 | |
4985 | SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, |
4986 | ARMcc, CCR, OverflowCmp); |
4987 | |
4988 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
4989 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); |
4990 | } |
4991 | |
4992 | static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, |
4993 | SelectionDAG &DAG) { |
4994 | SDLoc DL(BoolCarry); |
4995 | EVT CarryVT = BoolCarry.getValueType(); |
4996 | |
4997 | // This converts the boolean value carry into the carry flag by doing |
4998 | // ARMISD::SUBC Carry, 1 |
4999 | SDValue Carry = DAG.getNode(ARMISD::SUBC, DL, |
5000 | DAG.getVTList(CarryVT, MVT::i32), |
5001 | BoolCarry, DAG.getConstant(1, DL, CarryVT)); |
5002 | return Carry.getValue(1); |
5003 | } |
5004 | |
5005 | static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, |
5006 | SelectionDAG &DAG) { |
5007 | SDLoc DL(Flags); |
5008 | |
5009 | // Now convert the carry flag into a boolean carry. We do this |
5010 | // using ARMISD:ADDE 0, 0, Carry |
5011 | return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), |
5012 | DAG.getConstant(0, DL, MVT::i32), |
5013 | DAG.getConstant(0, DL, MVT::i32), Flags); |
5014 | } |
5015 | |
5016 | SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, |
5017 | SelectionDAG &DAG) const { |
5018 | // Let legalize expand this if it isn't a legal type yet. |
5019 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) |
5020 | return SDValue(); |
5021 | |
5022 | SDValue LHS = Op.getOperand(0); |
5023 | SDValue RHS = Op.getOperand(1); |
5024 | SDLoc dl(Op); |
5025 | |
5026 | EVT VT = Op.getValueType(); |
5027 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
5028 | SDValue Value; |
5029 | SDValue Overflow; |
5030 | switch (Op.getOpcode()) { |
5031 | default: |
5032 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5032); |
5033 | case ISD::UADDO: |
5034 | Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); |
5035 | // Convert the carry flag into a boolean value. |
5036 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); |
5037 | break; |
5038 | case ISD::USUBO: { |
5039 | Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); |
5040 | // Convert the carry flag into a boolean value. |
5041 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); |
5042 | // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow |
5043 | // value. So compute 1 - C. |
5044 | Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, |
5045 | DAG.getConstant(1, dl, MVT::i32), Overflow); |
5046 | break; |
5047 | } |
5048 | } |
5049 | |
5050 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); |
5051 | } |
5052 | |
5053 | static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, |
5054 | const ARMSubtarget *Subtarget) { |
5055 | EVT VT = Op.getValueType(); |
5056 | if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) |
5057 | return SDValue(); |
5058 | if (!VT.isSimple()) |
5059 | return SDValue(); |
5060 | |
5061 | unsigned NewOpcode; |
5062 | switch (VT.getSimpleVT().SimpleTy) { |
5063 | default: |
5064 | return SDValue(); |
5065 | case MVT::i8: |
5066 | switch (Op->getOpcode()) { |
5067 | case ISD::UADDSAT: |
5068 | NewOpcode = ARMISD::UQADD8b; |
5069 | break; |
5070 | case ISD::SADDSAT: |
5071 | NewOpcode = ARMISD::QADD8b; |
5072 | break; |
5073 | case ISD::USUBSAT: |
5074 | NewOpcode = ARMISD::UQSUB8b; |
5075 | break; |
5076 | case ISD::SSUBSAT: |
5077 | NewOpcode = ARMISD::QSUB8b; |
5078 | break; |
5079 | } |
5080 | break; |
5081 | case MVT::i16: |
5082 | switch (Op->getOpcode()) { |
5083 | case ISD::UADDSAT: |
5084 | NewOpcode = ARMISD::UQADD16b; |
5085 | break; |
5086 | case ISD::SADDSAT: |
5087 | NewOpcode = ARMISD::QADD16b; |
5088 | break; |
5089 | case ISD::USUBSAT: |
5090 | NewOpcode = ARMISD::UQSUB16b; |
5091 | break; |
5092 | case ISD::SSUBSAT: |
5093 | NewOpcode = ARMISD::QSUB16b; |
5094 | break; |
5095 | } |
5096 | break; |
5097 | } |
5098 | |
5099 | SDLoc dl(Op); |
5100 | SDValue Add = |
5101 | DAG.getNode(NewOpcode, dl, MVT::i32, |
5102 | DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), |
5103 | DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); |
5104 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); |
5105 | } |
5106 | |
5107 | SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
5108 | SDValue Cond = Op.getOperand(0); |
5109 | SDValue SelectTrue = Op.getOperand(1); |
5110 | SDValue SelectFalse = Op.getOperand(2); |
5111 | SDLoc dl(Op); |
5112 | unsigned Opc = Cond.getOpcode(); |
5113 | |
5114 | if (Cond.getResNo() == 1 && |
5115 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || |
5116 | Opc == ISD::USUBO)) { |
5117 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) |
5118 | return SDValue(); |
5119 | |
5120 | SDValue Value, OverflowCmp; |
5121 | SDValue ARMcc; |
5122 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); |
5123 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5124 | EVT VT = Op.getValueType(); |
5125 | |
5126 | return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, |
5127 | OverflowCmp, DAG); |
5128 | } |
5129 | |
5130 | // Convert: |
5131 | // |
5132 | // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) |
5133 | // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) |
5134 | // |
5135 | if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { |
5136 | const ConstantSDNode *CMOVTrue = |
5137 | dyn_cast<ConstantSDNode>(Cond.getOperand(0)); |
5138 | const ConstantSDNode *CMOVFalse = |
5139 | dyn_cast<ConstantSDNode>(Cond.getOperand(1)); |
5140 | |
5141 | if (CMOVTrue && CMOVFalse) { |
5142 | unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); |
5143 | unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); |
5144 | |
5145 | SDValue True; |
5146 | SDValue False; |
5147 | if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { |
5148 | True = SelectTrue; |
5149 | False = SelectFalse; |
5150 | } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { |
5151 | True = SelectFalse; |
5152 | False = SelectTrue; |
5153 | } |
5154 | |
5155 | if (True.getNode() && False.getNode()) { |
5156 | EVT VT = Op.getValueType(); |
5157 | SDValue ARMcc = Cond.getOperand(2); |
5158 | SDValue CCR = Cond.getOperand(3); |
5159 | SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); |
5160 | assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void ( 0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5160, __extension__ __PRETTY_FUNCTION__)); |
5161 | return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); |
5162 | } |
5163 | } |
5164 | } |
5165 | |
5166 | // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the |
5167 | // undefined bits before doing a full-word comparison with zero. |
5168 | Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, |
5169 | DAG.getConstant(1, dl, Cond.getValueType())); |
5170 | |
5171 | return DAG.getSelectCC(dl, Cond, |
5172 | DAG.getConstant(0, dl, Cond.getValueType()), |
5173 | SelectTrue, SelectFalse, ISD::SETNE); |
5174 | } |
5175 | |
5176 | static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, |
5177 | bool &swpCmpOps, bool &swpVselOps) { |
5178 | // Start by selecting the GE condition code for opcodes that return true for |
5179 | // 'equality' |
5180 | if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || |
5181 | CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE) |
5182 | CondCode = ARMCC::GE; |
5183 | |
5184 | // and GT for opcodes that return false for 'equality'. |
5185 | else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || |
5186 | CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT) |
5187 | CondCode = ARMCC::GT; |
5188 | |
5189 | // Since we are constrained to GE/GT, if the opcode contains 'less', we need |
5190 | // to swap the compare operands. |
5191 | if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || |
5192 | CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT) |
5193 | swpCmpOps = true; |
5194 | |
5195 | // Both GT and GE are ordered comparisons, and return false for 'unordered'. |
5196 | // If we have an unordered opcode, we need to swap the operands to the VSEL |
5197 | // instruction (effectively negating the condition). |
5198 | // |
5199 | // This also has the effect of swapping which one of 'less' or 'greater' |
5200 | // returns true, so we also swap the compare operands. It also switches |
5201 | // whether we return true for 'equality', so we compensate by picking the |
5202 | // opposite condition code to our original choice. |
5203 | if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || |
5204 | CC == ISD::SETUGT) { |
5205 | swpCmpOps = !swpCmpOps; |
5206 | swpVselOps = !swpVselOps; |
5207 | CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; |
5208 | } |
5209 | |
5210 | // 'ordered' is 'anything but unordered', so use the VS condition code and |
5211 | // swap the VSEL operands. |
5212 | if (CC == ISD::SETO) { |
5213 | CondCode = ARMCC::VS; |
5214 | swpVselOps = true; |
5215 | } |
5216 | |
5217 | // 'unordered or not equal' is 'anything but equal', so use the EQ condition |
5218 | // code and swap the VSEL operands. Also do this if we don't care about the |
5219 | // unordered case. |
5220 | if (CC == ISD::SETUNE || CC == ISD::SETNE) { |
5221 | CondCode = ARMCC::EQ; |
5222 | swpVselOps = true; |
5223 | } |
5224 | } |
5225 | |
5226 | SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, |
5227 | SDValue TrueVal, SDValue ARMcc, SDValue CCR, |
5228 | SDValue Cmp, SelectionDAG &DAG) const { |
5229 | if (!Subtarget->hasFP64() && VT == MVT::f64) { |
5230 | FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, |
5231 | DAG.getVTList(MVT::i32, MVT::i32), FalseVal); |
5232 | TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, |
5233 | DAG.getVTList(MVT::i32, MVT::i32), TrueVal); |
5234 | |
5235 | SDValue TrueLow = TrueVal.getValue(0); |
5236 | SDValue TrueHigh = TrueVal.getValue(1); |
5237 | SDValue FalseLow = FalseVal.getValue(0); |
5238 | SDValue FalseHigh = FalseVal.getValue(1); |
5239 | |
5240 | SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, |
5241 | ARMcc, CCR, Cmp); |
5242 | SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, |
5243 | ARMcc, CCR, duplicateCmp(Cmp, DAG)); |
5244 | |
5245 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); |
5246 | } else { |
5247 | return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, |
5248 | Cmp); |
5249 | } |
5250 | } |
5251 | |
5252 | static bool isGTorGE(ISD::CondCode CC) { |
5253 | return CC == ISD::SETGT || CC == ISD::SETGE; |
5254 | } |
5255 | |
5256 | static bool isLTorLE(ISD::CondCode CC) { |
5257 | return CC == ISD::SETLT || CC == ISD::SETLE; |
5258 | } |
5259 | |
5260 | // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. |
5261 | // All of these conditions (and their <= and >= counterparts) will do: |
5262 | // x < k ? k : x |
5263 | // x > k ? x : k |
5264 | // k < x ? x : k |
5265 | // k > x ? k : x |
5266 | static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, |
5267 | const SDValue TrueVal, const SDValue FalseVal, |
5268 | const ISD::CondCode CC, const SDValue K) { |
5269 | return (isGTorGE(CC) && |
5270 | ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || |
5271 | (isLTorLE(CC) && |
5272 | ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); |
5273 | } |
5274 | |
5275 | // Check if two chained conditionals could be converted into SSAT or USAT. |
5276 | // |
5277 | // SSAT can replace a set of two conditional selectors that bound a number to an |
5278 | // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: |
5279 | // |
5280 | // x < -k ? -k : (x > k ? k : x) |
5281 | // x < -k ? -k : (x < k ? x : k) |
5282 | // x > -k ? (x > k ? k : x) : -k |
5283 | // x < k ? (x < -k ? -k : x) : k |
5284 | // etc. |
5285 | // |
5286 | // LLVM canonicalizes these to either a min(max()) or a max(min()) |
5287 | // pattern. This function tries to match one of these and will return a SSAT |
5288 | // node if successful. |
5289 | // |
5290 | // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 |
5291 | // is a power of 2. |
5292 | static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) { |
5293 | EVT VT = Op.getValueType(); |
5294 | SDValue V1 = Op.getOperand(0); |
5295 | SDValue K1 = Op.getOperand(1); |
5296 | SDValue TrueVal1 = Op.getOperand(2); |
5297 | SDValue FalseVal1 = Op.getOperand(3); |
5298 | ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); |
5299 | |
5300 | const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; |
5301 | if (Op2.getOpcode() != ISD::SELECT_CC) |
5302 | return SDValue(); |
5303 | |
5304 | SDValue V2 = Op2.getOperand(0); |
5305 | SDValue K2 = Op2.getOperand(1); |
5306 | SDValue TrueVal2 = Op2.getOperand(2); |
5307 | SDValue FalseVal2 = Op2.getOperand(3); |
5308 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); |
5309 | |
5310 | SDValue V1Tmp = V1; |
5311 | SDValue V2Tmp = V2; |
5312 | |
5313 | // Check that the registers and the constants match a max(min()) or min(max()) |
5314 | // pattern |
5315 | if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 || |
5316 | K2 != FalseVal2 || |
5317 | !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) |
5318 | return SDValue(); |
5319 | |
5320 | // Check that the constant in the lower-bound check is |
5321 | // the opposite of the constant in the upper-bound check |
5322 | // in 1's complement. |
5323 | if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2)) |
5324 | return SDValue(); |
5325 | |
5326 | int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue(); |
5327 | int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue(); |
5328 | int64_t PosVal = std::max(Val1, Val2); |
5329 | int64_t NegVal = std::min(Val1, Val2); |
5330 | |
5331 | if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || |
5332 | !isPowerOf2_64(PosVal + 1)) |
5333 | return SDValue(); |
5334 | |
5335 | // Handle the difference between USAT (unsigned) and SSAT (signed) |
5336 | // saturation |
5337 | // At this point, PosVal is guaranteed to be positive |
5338 | uint64_t K = PosVal; |
5339 | SDLoc dl(Op); |
5340 | if (Val1 == ~Val2) |
5341 | return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp, |
5342 | DAG.getConstant(llvm::countr_one(K), dl, VT)); |
5343 | if (NegVal == 0) |
5344 | return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp, |
5345 | DAG.getConstant(llvm::countr_one(K), dl, VT)); |
5346 | |
5347 | return SDValue(); |
5348 | } |
5349 | |
5350 | // Check if a condition of the type x < k ? k : x can be converted into a |
5351 | // bit operation instead of conditional moves. |
5352 | // Currently this is allowed given: |
5353 | // - The conditions and values match up |
5354 | // - k is 0 or -1 (all ones) |
5355 | // This function will not check the last condition, thats up to the caller |
5356 | // It returns true if the transformation can be made, and in such case |
5357 | // returns x in V, and k in SatK. |
5358 | static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, |
5359 | SDValue &SatK) |
5360 | { |
5361 | SDValue LHS = Op.getOperand(0); |
5362 | SDValue RHS = Op.getOperand(1); |
5363 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); |
5364 | SDValue TrueVal = Op.getOperand(2); |
5365 | SDValue FalseVal = Op.getOperand(3); |
5366 | |
5367 | SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS) |
5368 | ? &RHS |
5369 | : nullptr; |
5370 | |
5371 | // No constant operation in comparison, early out |
5372 | if (!K) |
5373 | return false; |
5374 | |
5375 | SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal; |
5376 | V = (KTmp == TrueVal) ? FalseVal : TrueVal; |
5377 | SDValue VTmp = (K && *K == LHS) ? RHS : LHS; |
5378 | |
5379 | // If the constant on left and right side, or variable on left and right, |
5380 | // does not match, early out |
5381 | if (*K != KTmp || V != VTmp) |
5382 | return false; |
5383 | |
5384 | if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { |
5385 | SatK = *K; |
5386 | return true; |
5387 | } |
5388 | |
5389 | return false; |
5390 | } |
5391 | |
5392 | bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const { |
5393 | if (VT == MVT::f32) |
5394 | return !Subtarget->hasVFP2Base(); |
5395 | if (VT == MVT::f64) |
5396 | return !Subtarget->hasFP64(); |
5397 | if (VT == MVT::f16) |
5398 | return !Subtarget->hasFullFP16(); |
5399 | return false; |
5400 | } |
5401 | |
5402 | SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { |
5403 | EVT VT = Op.getValueType(); |
5404 | SDLoc dl(Op); |
5405 | |
5406 | // Try to convert two saturating conditional selects into a single SSAT |
5407 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) |
5408 | if (SDValue SatValue = LowerSaturatingConditional(Op, DAG)) |
5409 | return SatValue; |
5410 | |
5411 | // Try to convert expressions of the form x < k ? k : x (and similar forms) |
5412 | // into more efficient bit operations, which is possible when k is 0 or -1 |
5413 | // On ARM and Thumb-2 which have flexible operand 2 this will result in |
5414 | // single instructions. On Thumb the shift and the bit operation will be two |
5415 | // instructions. |
5416 | // Only allow this transformation on full-width (32-bit) operations |
5417 | SDValue LowerSatConstant; |
5418 | SDValue SatValue; |
5419 | if (VT == MVT::i32 && |
5420 | isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { |
5421 | SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, |
5422 | DAG.getConstant(31, dl, VT)); |
5423 | if (isNullConstant(LowerSatConstant)) { |
5424 | SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, |
5425 | DAG.getAllOnesConstant(dl, VT)); |
5426 | return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); |
5427 | } else if (isAllOnesConstant(LowerSatConstant)) |
5428 | return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); |
5429 | } |
5430 | |
5431 | SDValue LHS = Op.getOperand(0); |
5432 | SDValue RHS = Op.getOperand(1); |
5433 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); |
5434 | SDValue TrueVal = Op.getOperand(2); |
5435 | SDValue FalseVal = Op.getOperand(3); |
5436 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal); |
5437 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal); |
5438 | |
5439 | if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && |
5440 | LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { |
5441 | unsigned TVal = CTVal->getZExtValue(); |
5442 | unsigned FVal = CFVal->getZExtValue(); |
5443 | unsigned Opcode = 0; |
5444 | |
5445 | if (TVal == ~FVal) { |
5446 | Opcode = ARMISD::CSINV; |
5447 | } else if (TVal == ~FVal + 1) { |
5448 | Opcode = ARMISD::CSNEG; |
5449 | } else if (TVal + 1 == FVal) { |
5450 | Opcode = ARMISD::CSINC; |
5451 | } else if (TVal == FVal + 1) { |
5452 | Opcode = ARMISD::CSINC; |
5453 | std::swap(TrueVal, FalseVal); |
5454 | std::swap(TVal, FVal); |
5455 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
5456 | } |
5457 | |
5458 | if (Opcode) { |
5459 | // If one of the constants is cheaper than another, materialise the |
5460 | // cheaper one and let the csel generate the other. |
5461 | if (Opcode != ARMISD::CSINC && |
5462 | HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { |
5463 | std::swap(TrueVal, FalseVal); |
5464 | std::swap(TVal, FVal); |
5465 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
5466 | } |
5467 | |
5468 | // Attempt to use ZR checking TVal is 0, possibly inverting the condition |
5469 | // to get there. CSINC not is invertable like the other two (~(~a) == a, |
5470 | // -(-a) == a, but (a+1)+1 != a). |
5471 | if (FVal == 0 && Opcode != ARMISD::CSINC) { |
5472 | std::swap(TrueVal, FalseVal); |
5473 | std::swap(TVal, FVal); |
5474 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
5475 | } |
5476 | |
5477 | // Drops F's value because we can get it by inverting/negating TVal. |
5478 | FalseVal = TrueVal; |
5479 | |
5480 | SDValue ARMcc; |
5481 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); |
5482 | EVT VT = TrueVal.getValueType(); |
5483 | return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); |
5484 | } |
5485 | } |
5486 | |
5487 | if (isUnsupportedFloatingType(LHS.getValueType())) { |
5488 | DAG.getTargetLoweringInfo().softenSetCCOperands( |
5489 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); |
5490 | |
5491 | // If softenSetCCOperands only returned one value, we should compare it to |
5492 | // zero. |
5493 | if (!RHS.getNode()) { |
5494 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); |
5495 | CC = ISD::SETNE; |
5496 | } |
5497 | } |
5498 | |
5499 | if (LHS.getValueType() == MVT::i32) { |
5500 | // Try to generate VSEL on ARMv8. |
5501 | // The VSEL instruction can't use all the usual ARM condition |
5502 | // codes: it only has two bits to select the condition code, so it's |
5503 | // constrained to use only GE, GT, VS and EQ. |
5504 | // |
5505 | // To implement all the various ISD::SETXXX opcodes, we sometimes need to |
5506 | // swap the operands of the previous compare instruction (effectively |
5507 | // inverting the compare condition, swapping 'less' and 'greater') and |
5508 | // sometimes need to swap the operands to the VSEL (which inverts the |
5509 | // condition in the sense of firing whenever the previous condition didn't) |
5510 | if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 || |
5511 | TrueVal.getValueType() == MVT::f32 || |
5512 | TrueVal.getValueType() == MVT::f64)) { |
5513 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); |
5514 | if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || |
5515 | CondCode == ARMCC::VC || CondCode == ARMCC::NE) { |
5516 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
5517 | std::swap(TrueVal, FalseVal); |
5518 | } |
5519 | } |
5520 | |
5521 | SDValue ARMcc; |
5522 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5523 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); |
5524 | // Choose GE over PL, which vsel does now support |
5525 | if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL) |
5526 | ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); |
5527 | return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); |
5528 | } |
5529 | |
5530 | ARMCC::CondCodes CondCode, CondCode2; |
5531 | FPCCToARMCC(CC, CondCode, CondCode2); |
5532 | |
5533 | // Normalize the fp compare. If RHS is zero we prefer to keep it there so we |
5534 | // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we |
5535 | // must use VSEL (limited condition codes), due to not having conditional f16 |
5536 | // moves. |
5537 | if (Subtarget->hasFPARMv8Base() && |
5538 | !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) && |
5539 | (TrueVal.getValueType() == MVT::f16 || |
5540 | TrueVal.getValueType() == MVT::f32 || |
5541 | TrueVal.getValueType() == MVT::f64)) { |
5542 | bool swpCmpOps = false; |
5543 | bool swpVselOps = false; |
5544 | checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); |
5545 | |
5546 | if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || |
5547 | CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { |
5548 | if (swpCmpOps) |
5549 | std::swap(LHS, RHS); |
5550 | if (swpVselOps) |
5551 | std::swap(TrueVal, FalseVal); |
5552 | } |
5553 | } |
5554 | |
5555 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); |
5556 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); |
5557 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5558 | SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); |
5559 | if (CondCode2 != ARMCC::AL) { |
5560 | SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); |
5561 | // FIXME: Needs another CMP because flag can have but one use. |
5562 | SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); |
5563 | Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); |
5564 | } |
5565 | return Result; |
5566 | } |
5567 | |
5568 | /// canChangeToInt - Given the fp compare operand, return true if it is suitable |
5569 | /// to morph to an integer compare sequence. |
5570 | static bool canChangeToInt(SDValue Op, bool &SeenZero, |
5571 | const ARMSubtarget *Subtarget) { |
5572 | SDNode *N = Op.getNode(); |
5573 | if (!N->hasOneUse()) |
5574 | // Otherwise it requires moving the value from fp to integer registers. |
5575 | return false; |
5576 | if (!N->getNumValues()) |
5577 | return false; |
5578 | EVT VT = Op.getValueType(); |
5579 | if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) |
5580 | // f32 case is generally profitable. f64 case only makes sense when vcmpe + |
5581 | // vmrs are very slow, e.g. cortex-a8. |
5582 | return false; |
5583 | |
5584 | if (isFloatingPointZero(Op)) { |
5585 | SeenZero = true; |
5586 | return true; |
5587 | } |
5588 | return ISD::isNormalLoad(N); |
5589 | } |
5590 | |
5591 | static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { |
5592 | if (isFloatingPointZero(Op)) |
5593 | return DAG.getConstant(0, SDLoc(Op), MVT::i32); |
5594 | |
5595 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) |
5596 | return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), |
5597 | Ld->getPointerInfo(), Ld->getAlign(), |
5598 | Ld->getMemOperand()->getFlags()); |
5599 | |
5600 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5600); |
5601 | } |
5602 | |
5603 | static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, |
5604 | SDValue &RetVal1, SDValue &RetVal2) { |
5605 | SDLoc dl(Op); |
5606 | |
5607 | if (isFloatingPointZero(Op)) { |
5608 | RetVal1 = DAG.getConstant(0, dl, MVT::i32); |
5609 | RetVal2 = DAG.getConstant(0, dl, MVT::i32); |
5610 | return; |
5611 | } |
5612 | |
5613 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { |
5614 | SDValue Ptr = Ld->getBasePtr(); |
5615 | RetVal1 = |
5616 | DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), |
5617 | Ld->getAlign(), Ld->getMemOperand()->getFlags()); |
5618 | |
5619 | EVT PtrType = Ptr.getValueType(); |
5620 | SDValue NewPtr = DAG.getNode(ISD::ADD, dl, |
5621 | PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); |
5622 | RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, |
5623 | Ld->getPointerInfo().getWithOffset(4), |
5624 | commonAlignment(Ld->getAlign(), 4), |
5625 | Ld->getMemOperand()->getFlags()); |
5626 | return; |
5627 | } |
5628 | |
5629 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5629); |
5630 | } |
5631 | |
5632 | /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some |
5633 | /// f32 and even f64 comparisons to integer ones. |
5634 | SDValue |
5635 | ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { |
5636 | SDValue Chain = Op.getOperand(0); |
5637 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); |
5638 | SDValue LHS = Op.getOperand(2); |
5639 | SDValue RHS = Op.getOperand(3); |
5640 | SDValue Dest = Op.getOperand(4); |
5641 | SDLoc dl(Op); |
5642 | |
5643 | bool LHSSeenZero = false; |
5644 | bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); |
5645 | bool RHSSeenZero = false; |
5646 | bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); |
5647 | if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { |
5648 | // If unsafe fp math optimization is enabled and there are no other uses of |
5649 | // the CMP operands, and the condition code is EQ or NE, we can optimize it |
5650 | // to an integer comparison. |
5651 | if (CC == ISD::SETOEQ) |
5652 | CC = ISD::SETEQ; |
5653 | else if (CC == ISD::SETUNE) |
5654 | CC = ISD::SETNE; |
5655 | |
5656 | SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); |
5657 | SDValue ARMcc; |
5658 | if (LHS.getValueType() == MVT::f32) { |
5659 | LHS = DAG.getNode(ISD::AND, dl, MVT::i32, |
5660 | bitcastf32Toi32(LHS, DAG), Mask); |
5661 | RHS = DAG.getNode(ISD::AND, dl, MVT::i32, |
5662 | bitcastf32Toi32(RHS, DAG), Mask); |
5663 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); |
5664 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5665 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, |
5666 | Chain, Dest, ARMcc, CCR, Cmp); |
5667 | } |
5668 | |
5669 | SDValue LHS1, LHS2; |
5670 | SDValue RHS1, RHS2; |
5671 | expandf64Toi32(LHS, DAG, LHS1, LHS2); |
5672 | expandf64Toi32(RHS, DAG, RHS1, RHS2); |
5673 | LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); |
5674 | RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); |
5675 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); |
5676 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); |
5677 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); |
5678 | SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; |
5679 | return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); |
5680 | } |
5681 | |
5682 | return SDValue(); |
5683 | } |
5684 | |
5685 | SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
5686 | SDValue Chain = Op.getOperand(0); |
5687 | SDValue Cond = Op.getOperand(1); |
5688 | SDValue Dest = Op.getOperand(2); |
5689 | SDLoc dl(Op); |
5690 | |
5691 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch |
5692 | // instruction. |
5693 | unsigned Opc = Cond.getOpcode(); |
5694 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && |
5695 | !Subtarget->isThumb1Only(); |
5696 | if (Cond.getResNo() == 1 && |
5697 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || |
5698 | Opc == ISD::USUBO || OptimizeMul)) { |
5699 | // Only lower legal XALUO ops. |
5700 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) |
5701 | return SDValue(); |
5702 | |
5703 | // The actual operation with overflow check. |
5704 | SDValue Value, OverflowCmp; |
5705 | SDValue ARMcc; |
5706 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); |
5707 | |
5708 | // Reverse the condition code. |
5709 | ARMCC::CondCodes CondCode = |
5710 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); |
5711 | CondCode = ARMCC::getOppositeCondition(CondCode); |
5712 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); |
5713 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5714 | |
5715 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, |
5716 | OverflowCmp); |
5717 | } |
5718 | |
5719 | return SDValue(); |
5720 | } |
5721 | |
5722 | SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { |
5723 | SDValue Chain = Op.getOperand(0); |
5724 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); |
5725 | SDValue LHS = Op.getOperand(2); |
5726 | SDValue RHS = Op.getOperand(3); |
5727 | SDValue Dest = Op.getOperand(4); |
5728 | SDLoc dl(Op); |
5729 | |
5730 | if (isUnsupportedFloatingType(LHS.getValueType())) { |
5731 | DAG.getTargetLoweringInfo().softenSetCCOperands( |
5732 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); |
5733 | |
5734 | // If softenSetCCOperands only returned one value, we should compare it to |
5735 | // zero. |
5736 | if (!RHS.getNode()) { |
5737 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); |
5738 | CC = ISD::SETNE; |
5739 | } |
5740 | } |
5741 | |
5742 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch |
5743 | // instruction. |
5744 | unsigned Opc = LHS.getOpcode(); |
5745 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && |
5746 | !Subtarget->isThumb1Only(); |
5747 | if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && |
5748 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || |
5749 | Opc == ISD::USUBO || OptimizeMul) && |
5750 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
5751 | // Only lower legal XALUO ops. |
5752 | if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) |
5753 | return SDValue(); |
5754 | |
5755 | // The actual operation with overflow check. |
5756 | SDValue Value, OverflowCmp; |
5757 | SDValue ARMcc; |
5758 | std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); |
5759 | |
5760 | if ((CC == ISD::SETNE) != isOneConstant(RHS)) { |
5761 | // Reverse the condition code. |
5762 | ARMCC::CondCodes CondCode = |
5763 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); |
5764 | CondCode = ARMCC::getOppositeCondition(CondCode); |
5765 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); |
5766 | } |
5767 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5768 | |
5769 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, |
5770 | OverflowCmp); |
5771 | } |
5772 | |
5773 | if (LHS.getValueType() == MVT::i32) { |
5774 | SDValue ARMcc; |
5775 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); |
5776 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5777 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, |
5778 | Chain, Dest, ARMcc, CCR, Cmp); |
5779 | } |
5780 | |
5781 | if (getTargetMachine().Options.UnsafeFPMath && |
5782 | (CC == ISD::SETEQ || CC == ISD::SETOEQ || |
5783 | CC == ISD::SETNE || CC == ISD::SETUNE)) { |
5784 | if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) |
5785 | return Result; |
5786 | } |
5787 | |
5788 | ARMCC::CondCodes CondCode, CondCode2; |
5789 | FPCCToARMCC(CC, CondCode, CondCode2); |
5790 | |
5791 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); |
5792 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); |
5793 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); |
5794 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); |
5795 | SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; |
5796 | SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); |
5797 | if (CondCode2 != ARMCC::AL) { |
5798 | ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); |
5799 | SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; |
5800 | Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); |
5801 | } |
5802 | return Res; |
5803 | } |
5804 | |
5805 | SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { |
5806 | SDValue Chain = Op.getOperand(0); |
5807 | SDValue Table = Op.getOperand(1); |
5808 | SDValue Index = Op.getOperand(2); |
5809 | SDLoc dl(Op); |
5810 | |
5811 | EVT PTy = getPointerTy(DAG.getDataLayout()); |
5812 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); |
5813 | SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); |
5814 | Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); |
5815 | Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); |
5816 | SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); |
5817 | if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { |
5818 | // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table |
5819 | // which does another jump to the destination. This also makes it easier |
5820 | // to translate it to TBB / TBH later (Thumb2 only). |
5821 | // FIXME: This might not work if the function is extremely large. |
5822 | return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, |
5823 | Addr, Op.getOperand(2), JTI); |
5824 | } |
5825 | if (isPositionIndependent() || Subtarget->isROPI()) { |
5826 | Addr = |
5827 | DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, |
5828 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); |
5829 | Chain = Addr.getValue(1); |
5830 | Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); |
5831 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); |
5832 | } else { |
5833 | Addr = |
5834 | DAG.getLoad(PTy, dl, Chain, Addr, |
5835 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); |
5836 | Chain = Addr.getValue(1); |
5837 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); |
5838 | } |
5839 | } |
5840 | |
5841 | static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { |
5842 | EVT VT = Op.getValueType(); |
5843 | SDLoc dl(Op); |
5844 | |
5845 | if (Op.getValueType().getVectorElementType() == MVT::i32) { |
5846 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) |
5847 | return Op; |
5848 | return DAG.UnrollVectorOp(Op.getNode()); |
5849 | } |
5850 | |
5851 | const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); |
5852 | |
5853 | EVT NewTy; |
5854 | const EVT OpTy = Op.getOperand(0).getValueType(); |
5855 | if (OpTy == MVT::v4f32) |
5856 | NewTy = MVT::v4i32; |
5857 | else if (OpTy == MVT::v4f16 && HasFullFP16) |
5858 | NewTy = MVT::v4i16; |
5859 | else if (OpTy == MVT::v8f16 && HasFullFP16) |
5860 | NewTy = MVT::v8i16; |
5861 | else |
5862 | llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5862); |
5863 | |
5864 | if (VT != MVT::v4i16 && VT != MVT::v8i16) |
5865 | return DAG.UnrollVectorOp(Op.getNode()); |
5866 | |
5867 | Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); |
5868 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); |
5869 | } |
5870 | |
5871 | SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { |
5872 | EVT VT = Op.getValueType(); |
5873 | if (VT.isVector()) |
5874 | return LowerVectorFP_TO_INT(Op, DAG); |
5875 | |
5876 | bool IsStrict = Op->isStrictFPOpcode(); |
5877 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
5878 | |
5879 | if (isUnsupportedFloatingType(SrcVal.getValueType())) { |
5880 | RTLIB::Libcall LC; |
5881 | if (Op.getOpcode() == ISD::FP_TO_SINT || |
5882 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT) |
5883 | LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), |
5884 | Op.getValueType()); |
5885 | else |
5886 | LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), |
5887 | Op.getValueType()); |
5888 | SDLoc Loc(Op); |
5889 | MakeLibCallOptions CallOptions; |
5890 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
5891 | SDValue Result; |
5892 | std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, |
5893 | CallOptions, Loc, Chain); |
5894 | return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; |
5895 | } |
5896 | |
5897 | // FIXME: Remove this when we have strict fp instruction selection patterns |
5898 | if (IsStrict) { |
5899 | SDLoc Loc(Op); |
5900 | SDValue Result = |
5901 | DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT |
5902 | : ISD::FP_TO_UINT, |
5903 | Loc, Op.getValueType(), SrcVal); |
5904 | return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); |
5905 | } |
5906 | |
5907 | return Op; |
5908 | } |
5909 | |
5910 | static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, |
5911 | const ARMSubtarget *Subtarget) { |
5912 | EVT VT = Op.getValueType(); |
5913 | EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
5914 | EVT FromVT = Op.getOperand(0).getValueType(); |
5915 | |
5916 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32) |
5917 | return Op; |
5918 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 && |
5919 | Subtarget->hasFP64()) |
5920 | return Op; |
5921 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 && |
5922 | Subtarget->hasFullFP16()) |
5923 | return Op; |
5924 | if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 && |
5925 | Subtarget->hasMVEFloatOps()) |
5926 | return Op; |
5927 | if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 && |
5928 | Subtarget->hasMVEFloatOps()) |
5929 | return Op; |
5930 | |
5931 | if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16) |
5932 | return SDValue(); |
5933 | |
5934 | SDLoc DL(Op); |
5935 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; |
5936 | unsigned BW = ToVT.getScalarSizeInBits() - IsSigned; |
5937 | SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), |
5938 | DAG.getValueType(VT.getScalarType())); |
5939 | SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT, |
5940 | DAG.getConstant((1 << BW) - 1, DL, VT)); |
5941 | if (IsSigned) |
5942 | Max = DAG.getNode(ISD::SMAX, DL, VT, Max, |
5943 | DAG.getConstant(-(1 << BW), DL, VT)); |
5944 | return Max; |
5945 | } |
5946 | |
5947 | static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { |
5948 | EVT VT = Op.getValueType(); |
5949 | SDLoc dl(Op); |
5950 | |
5951 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { |
5952 | if (VT.getVectorElementType() == MVT::f32) |
5953 | return Op; |
5954 | return DAG.UnrollVectorOp(Op.getNode()); |
5955 | } |
5956 | |
5957 | assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5959, __extension__ __PRETTY_FUNCTION__)) |
5958 | Op.getOperand(0).getValueType() == MVT::v8i16) &&(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5959, __extension__ __PRETTY_FUNCTION__)) |
5959 | "Invalid type for custom lowering!")(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5959, __extension__ __PRETTY_FUNCTION__)); |
5960 | |
5961 | const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); |
5962 | |
5963 | EVT DestVecType; |
5964 | if (VT == MVT::v4f32) |
5965 | DestVecType = MVT::v4i32; |
5966 | else if (VT == MVT::v4f16 && HasFullFP16) |
5967 | DestVecType = MVT::v4i16; |
5968 | else if (VT == MVT::v8f16 && HasFullFP16) |
5969 | DestVecType = MVT::v8i16; |
5970 | else |
5971 | return DAG.UnrollVectorOp(Op.getNode()); |
5972 | |
5973 | unsigned CastOpc; |
5974 | unsigned Opc; |
5975 | switch (Op.getOpcode()) { |
5976 | default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5976); |
5977 | case ISD::SINT_TO_FP: |
5978 | CastOpc = ISD::SIGN_EXTEND; |
5979 | Opc = ISD::SINT_TO_FP; |
5980 | break; |
5981 | case ISD::UINT_TO_FP: |
5982 | CastOpc = ISD::ZERO_EXTEND; |
5983 | Opc = ISD::UINT_TO_FP; |
5984 | break; |
5985 | } |
5986 | |
5987 | Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0)); |
5988 | return DAG.getNode(Opc, dl, VT, Op); |
5989 | } |
5990 | |
5991 | SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { |
5992 | EVT VT = Op.getValueType(); |
5993 | if (VT.isVector()) |
5994 | return LowerVectorINT_TO_FP(Op, DAG); |
5995 | if (isUnsupportedFloatingType(VT)) { |
5996 | RTLIB::Libcall LC; |
5997 | if (Op.getOpcode() == ISD::SINT_TO_FP) |
5998 | LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), |
5999 | Op.getValueType()); |
6000 | else |
6001 | LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), |
6002 | Op.getValueType()); |
6003 | MakeLibCallOptions CallOptions; |
6004 | return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), |
6005 | CallOptions, SDLoc(Op)).first; |
6006 | } |
6007 | |
6008 | return Op; |
6009 | } |
6010 | |
6011 | SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { |
6012 | // Implement fcopysign with a fabs and a conditional fneg. |
6013 | SDValue Tmp0 = Op.getOperand(0); |
6014 | SDValue Tmp1 = Op.getOperand(1); |
6015 | SDLoc dl(Op); |
6016 | EVT VT = Op.getValueType(); |
6017 | EVT SrcVT = Tmp1.getValueType(); |
6018 | bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || |
6019 | Tmp0.getOpcode() == ARMISD::VMOVDRR; |
6020 | bool UseNEON = !InGPR && Subtarget->hasNEON(); |
6021 | |
6022 | if (UseNEON) { |
6023 | // Use VBSL to copy the sign bit. |
6024 | unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80); |
6025 | SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, |
6026 | DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); |
6027 | EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; |
6028 | if (VT == MVT::f64) |
6029 | Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT, |
6030 | DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), |
6031 | DAG.getConstant(32, dl, MVT::i32)); |
6032 | else /*if (VT == MVT::f32)*/ |
6033 | Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); |
6034 | if (SrcVT == MVT::f32) { |
6035 | Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); |
6036 | if (VT == MVT::f64) |
6037 | Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT, |
6038 | DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), |
6039 | DAG.getConstant(32, dl, MVT::i32)); |
6040 | } else if (VT == MVT::f32) |
6041 | Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64, |
6042 | DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), |
6043 | DAG.getConstant(32, dl, MVT::i32)); |
6044 | Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); |
6045 | Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); |
6046 | |
6047 | SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), |
6048 | dl, MVT::i32); |
6049 | AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); |
6050 | SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, |
6051 | DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); |
6052 | |
6053 | SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, |
6054 | DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), |