File: | build/source/llvm/lib/Target/ARM/ARMISelLowering.cpp |
Warning: | line 2710, column 20 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file defines the interfaces that ARM uses to lower LLVM code into a | ||||
10 | // selection DAG. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "ARMISelLowering.h" | ||||
15 | #include "ARMBaseInstrInfo.h" | ||||
16 | #include "ARMBaseRegisterInfo.h" | ||||
17 | #include "ARMCallingConv.h" | ||||
18 | #include "ARMConstantPoolValue.h" | ||||
19 | #include "ARMMachineFunctionInfo.h" | ||||
20 | #include "ARMPerfectShuffle.h" | ||||
21 | #include "ARMRegisterInfo.h" | ||||
22 | #include "ARMSelectionDAGInfo.h" | ||||
23 | #include "ARMSubtarget.h" | ||||
24 | #include "ARMTargetTransformInfo.h" | ||||
25 | #include "MCTargetDesc/ARMAddressingModes.h" | ||||
26 | #include "MCTargetDesc/ARMBaseInfo.h" | ||||
27 | #include "Utils/ARMBaseInfo.h" | ||||
28 | #include "llvm/ADT/APFloat.h" | ||||
29 | #include "llvm/ADT/APInt.h" | ||||
30 | #include "llvm/ADT/ArrayRef.h" | ||||
31 | #include "llvm/ADT/BitVector.h" | ||||
32 | #include "llvm/ADT/DenseMap.h" | ||||
33 | #include "llvm/ADT/STLExtras.h" | ||||
34 | #include "llvm/ADT/SmallPtrSet.h" | ||||
35 | #include "llvm/ADT/SmallVector.h" | ||||
36 | #include "llvm/ADT/Statistic.h" | ||||
37 | #include "llvm/ADT/StringExtras.h" | ||||
38 | #include "llvm/ADT/StringRef.h" | ||||
39 | #include "llvm/ADT/StringSwitch.h" | ||||
40 | #include "llvm/ADT/Twine.h" | ||||
41 | #include "llvm/Analysis/VectorUtils.h" | ||||
42 | #include "llvm/CodeGen/CallingConvLower.h" | ||||
43 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||
44 | #include "llvm/CodeGen/IntrinsicLowering.h" | ||||
45 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
46 | #include "llvm/CodeGen/MachineConstantPool.h" | ||||
47 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||||
48 | #include "llvm/CodeGen/MachineFunction.h" | ||||
49 | #include "llvm/CodeGen/MachineInstr.h" | ||||
50 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
51 | #include "llvm/CodeGen/MachineJumpTableInfo.h" | ||||
52 | #include "llvm/CodeGen/MachineMemOperand.h" | ||||
53 | #include "llvm/CodeGen/MachineOperand.h" | ||||
54 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
55 | #include "llvm/CodeGen/RuntimeLibcalls.h" | ||||
56 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
57 | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" | ||||
58 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
59 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||
60 | #include "llvm/CodeGen/TargetLowering.h" | ||||
61 | #include "llvm/CodeGen/TargetOpcodes.h" | ||||
62 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||||
63 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||
64 | #include "llvm/CodeGen/ValueTypes.h" | ||||
65 | #include "llvm/IR/Attributes.h" | ||||
66 | #include "llvm/IR/CallingConv.h" | ||||
67 | #include "llvm/IR/Constant.h" | ||||
68 | #include "llvm/IR/Constants.h" | ||||
69 | #include "llvm/IR/DataLayout.h" | ||||
70 | #include "llvm/IR/DebugLoc.h" | ||||
71 | #include "llvm/IR/DerivedTypes.h" | ||||
72 | #include "llvm/IR/Function.h" | ||||
73 | #include "llvm/IR/GlobalAlias.h" | ||||
74 | #include "llvm/IR/GlobalValue.h" | ||||
75 | #include "llvm/IR/GlobalVariable.h" | ||||
76 | #include "llvm/IR/IRBuilder.h" | ||||
77 | #include "llvm/IR/InlineAsm.h" | ||||
78 | #include "llvm/IR/Instruction.h" | ||||
79 | #include "llvm/IR/Instructions.h" | ||||
80 | #include "llvm/IR/IntrinsicInst.h" | ||||
81 | #include "llvm/IR/Intrinsics.h" | ||||
82 | #include "llvm/IR/IntrinsicsARM.h" | ||||
83 | #include "llvm/IR/Module.h" | ||||
84 | #include "llvm/IR/PatternMatch.h" | ||||
85 | #include "llvm/IR/Type.h" | ||||
86 | #include "llvm/IR/User.h" | ||||
87 | #include "llvm/IR/Value.h" | ||||
88 | #include "llvm/MC/MCInstrDesc.h" | ||||
89 | #include "llvm/MC/MCInstrItineraries.h" | ||||
90 | #include "llvm/MC/MCRegisterInfo.h" | ||||
91 | #include "llvm/MC/MCSchedule.h" | ||||
92 | #include "llvm/Support/AtomicOrdering.h" | ||||
93 | #include "llvm/Support/BranchProbability.h" | ||||
94 | #include "llvm/Support/Casting.h" | ||||
95 | #include "llvm/Support/CodeGen.h" | ||||
96 | #include "llvm/Support/CommandLine.h" | ||||
97 | #include "llvm/Support/Compiler.h" | ||||
98 | #include "llvm/Support/Debug.h" | ||||
99 | #include "llvm/Support/ErrorHandling.h" | ||||
100 | #include "llvm/Support/KnownBits.h" | ||||
101 | #include "llvm/Support/MachineValueType.h" | ||||
102 | #include "llvm/Support/MathExtras.h" | ||||
103 | #include "llvm/Support/raw_ostream.h" | ||||
104 | #include "llvm/Target/TargetMachine.h" | ||||
105 | #include "llvm/Target/TargetOptions.h" | ||||
106 | #include "llvm/TargetParser/Triple.h" | ||||
107 | #include <algorithm> | ||||
108 | #include <cassert> | ||||
109 | #include <cstdint> | ||||
110 | #include <cstdlib> | ||||
111 | #include <iterator> | ||||
112 | #include <limits> | ||||
113 | #include <optional> | ||||
114 | #include <string> | ||||
115 | #include <tuple> | ||||
116 | #include <utility> | ||||
117 | #include <vector> | ||||
118 | |||||
119 | using namespace llvm; | ||||
120 | using namespace llvm::PatternMatch; | ||||
121 | |||||
122 | #define DEBUG_TYPE"arm-isel" "arm-isel" | ||||
123 | |||||
124 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls" , "Number of tail calls"}; | ||||
125 | STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt" , "Number of GAs materialized with movw + movt"}; | ||||
126 | STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals" , "Number of loops generated for byval arguments"}; | ||||
127 | STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" } | ||||
128 | "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted" , "Number of constants with their storage promoted into constant pools" }; | ||||
129 | |||||
130 | static cl::opt<bool> | ||||
131 | ARMInterworking("arm-interworking", cl::Hidden, | ||||
132 | cl::desc("Enable / disable ARM interworking (for debugging only)"), | ||||
133 | cl::init(true)); | ||||
134 | |||||
135 | static cl::opt<bool> EnableConstpoolPromotion( | ||||
136 | "arm-promote-constant", cl::Hidden, | ||||
137 | cl::desc("Enable / disable promotion of unnamed_addr constants into " | ||||
138 | "constant pools"), | ||||
139 | cl::init(false)); // FIXME: set to true by default once PR32780 is fixed | ||||
140 | static cl::opt<unsigned> ConstpoolPromotionMaxSize( | ||||
141 | "arm-promote-constant-max-size", cl::Hidden, | ||||
142 | cl::desc("Maximum size of constant to promote into a constant pool"), | ||||
143 | cl::init(64)); | ||||
144 | static cl::opt<unsigned> ConstpoolPromotionMaxTotal( | ||||
145 | "arm-promote-constant-max-total", cl::Hidden, | ||||
146 | cl::desc("Maximum size of ALL constants to promote into a constant pool"), | ||||
147 | cl::init(128)); | ||||
148 | |||||
149 | cl::opt<unsigned> | ||||
150 | MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, | ||||
151 | cl::desc("Maximum interleave factor for MVE VLDn to generate."), | ||||
152 | cl::init(2)); | ||||
153 | |||||
154 | // The APCS parameter registers. | ||||
155 | static const MCPhysReg GPRArgRegs[] = { | ||||
156 | ARM::R0, ARM::R1, ARM::R2, ARM::R3 | ||||
157 | }; | ||||
158 | |||||
159 | void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { | ||||
160 | if (VT != PromotedLdStVT) { | ||||
161 | setOperationAction(ISD::LOAD, VT, Promote); | ||||
162 | AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); | ||||
163 | |||||
164 | setOperationAction(ISD::STORE, VT, Promote); | ||||
165 | AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); | ||||
166 | } | ||||
167 | |||||
168 | MVT ElemTy = VT.getVectorElementType(); | ||||
169 | if (ElemTy != MVT::f64) | ||||
170 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
171 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
172 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
173 | if (ElemTy == MVT::i32) { | ||||
174 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); | ||||
175 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); | ||||
176 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); | ||||
177 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); | ||||
178 | } else { | ||||
179 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | ||||
180 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | ||||
181 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | ||||
182 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | ||||
183 | } | ||||
184 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
185 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
186 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); | ||||
187 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); | ||||
188 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
189 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
190 | setOperationAction(ISD::VSELECT, VT, Expand); | ||||
191 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); | ||||
192 | if (VT.isInteger()) { | ||||
193 | setOperationAction(ISD::SHL, VT, Custom); | ||||
194 | setOperationAction(ISD::SRA, VT, Custom); | ||||
195 | setOperationAction(ISD::SRL, VT, Custom); | ||||
196 | } | ||||
197 | |||||
198 | // Neon does not support vector divide/remainder operations. | ||||
199 | setOperationAction(ISD::SDIV, VT, Expand); | ||||
200 | setOperationAction(ISD::UDIV, VT, Expand); | ||||
201 | setOperationAction(ISD::FDIV, VT, Expand); | ||||
202 | setOperationAction(ISD::SREM, VT, Expand); | ||||
203 | setOperationAction(ISD::UREM, VT, Expand); | ||||
204 | setOperationAction(ISD::FREM, VT, Expand); | ||||
205 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||||
206 | setOperationAction(ISD::UDIVREM, VT, Expand); | ||||
207 | |||||
208 | if (!VT.isFloatingPoint() && | ||||
209 | VT != MVT::v2i64 && VT != MVT::v1i64) | ||||
210 | for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) | ||||
211 | setOperationAction(Opcode, VT, Legal); | ||||
212 | if (!VT.isFloatingPoint()) | ||||
213 | for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) | ||||
214 | setOperationAction(Opcode, VT, Legal); | ||||
215 | } | ||||
216 | |||||
217 | void ARMTargetLowering::addDRTypeForNEON(MVT VT) { | ||||
218 | addRegisterClass(VT, &ARM::DPRRegClass); | ||||
219 | addTypeForNEON(VT, MVT::f64); | ||||
220 | } | ||||
221 | |||||
222 | void ARMTargetLowering::addQRTypeForNEON(MVT VT) { | ||||
223 | addRegisterClass(VT, &ARM::DPairRegClass); | ||||
224 | addTypeForNEON(VT, MVT::v2f64); | ||||
225 | } | ||||
226 | |||||
227 | void ARMTargetLowering::setAllExpand(MVT VT) { | ||||
228 | for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) | ||||
229 | setOperationAction(Opc, VT, Expand); | ||||
230 | |||||
231 | // We support these really simple operations even on types where all | ||||
232 | // the actual arithmetic has to be broken down into simpler | ||||
233 | // operations or turned into library calls. | ||||
234 | setOperationAction(ISD::BITCAST, VT, Legal); | ||||
235 | setOperationAction(ISD::LOAD, VT, Legal); | ||||
236 | setOperationAction(ISD::STORE, VT, Legal); | ||||
237 | setOperationAction(ISD::UNDEF, VT, Legal); | ||||
238 | } | ||||
239 | |||||
240 | void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, | ||||
241 | LegalizeAction Action) { | ||||
242 | setLoadExtAction(ISD::EXTLOAD, From, To, Action); | ||||
243 | setLoadExtAction(ISD::ZEXTLOAD, From, To, Action); | ||||
244 | setLoadExtAction(ISD::SEXTLOAD, From, To, Action); | ||||
245 | } | ||||
246 | |||||
247 | void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { | ||||
248 | const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 }; | ||||
249 | |||||
250 | for (auto VT : IntTypes) { | ||||
251 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
252 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
253 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
254 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
255 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
256 | setOperationAction(ISD::SHL, VT, Custom); | ||||
257 | setOperationAction(ISD::SRA, VT, Custom); | ||||
258 | setOperationAction(ISD::SRL, VT, Custom); | ||||
259 | setOperationAction(ISD::SMIN, VT, Legal); | ||||
260 | setOperationAction(ISD::SMAX, VT, Legal); | ||||
261 | setOperationAction(ISD::UMIN, VT, Legal); | ||||
262 | setOperationAction(ISD::UMAX, VT, Legal); | ||||
263 | setOperationAction(ISD::ABS, VT, Legal); | ||||
264 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
265 | setOperationAction(ISD::MLOAD, VT, Custom); | ||||
266 | setOperationAction(ISD::MSTORE, VT, Legal); | ||||
267 | setOperationAction(ISD::CTLZ, VT, Legal); | ||||
268 | setOperationAction(ISD::CTTZ, VT, Custom); | ||||
269 | setOperationAction(ISD::BITREVERSE, VT, Legal); | ||||
270 | setOperationAction(ISD::BSWAP, VT, Legal); | ||||
271 | setOperationAction(ISD::SADDSAT, VT, Legal); | ||||
272 | setOperationAction(ISD::UADDSAT, VT, Legal); | ||||
273 | setOperationAction(ISD::SSUBSAT, VT, Legal); | ||||
274 | setOperationAction(ISD::USUBSAT, VT, Legal); | ||||
275 | setOperationAction(ISD::ABDS, VT, Legal); | ||||
276 | setOperationAction(ISD::ABDU, VT, Legal); | ||||
277 | setOperationAction(ISD::AVGFLOORS, VT, Legal); | ||||
278 | setOperationAction(ISD::AVGFLOORU, VT, Legal); | ||||
279 | setOperationAction(ISD::AVGCEILS, VT, Legal); | ||||
280 | setOperationAction(ISD::AVGCEILU, VT, Legal); | ||||
281 | |||||
282 | // No native support for these. | ||||
283 | setOperationAction(ISD::UDIV, VT, Expand); | ||||
284 | setOperationAction(ISD::SDIV, VT, Expand); | ||||
285 | setOperationAction(ISD::UREM, VT, Expand); | ||||
286 | setOperationAction(ISD::SREM, VT, Expand); | ||||
287 | setOperationAction(ISD::UDIVREM, VT, Expand); | ||||
288 | setOperationAction(ISD::SDIVREM, VT, Expand); | ||||
289 | setOperationAction(ISD::CTPOP, VT, Expand); | ||||
290 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
291 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
292 | |||||
293 | // Vector reductions | ||||
294 | setOperationAction(ISD::VECREDUCE_ADD, VT, Legal); | ||||
295 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal); | ||||
296 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal); | ||||
297 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal); | ||||
298 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal); | ||||
299 | setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); | ||||
300 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); | ||||
301 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); | ||||
302 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); | ||||
303 | |||||
304 | if (!HasMVEFP) { | ||||
305 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | ||||
306 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | ||||
307 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | ||||
308 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | ||||
309 | } else { | ||||
310 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); | ||||
311 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); | ||||
312 | } | ||||
313 | |||||
314 | // Pre and Post inc are supported on loads and stores | ||||
315 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
316 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
317 | setIndexedLoadAction(im, VT, Legal); | ||||
318 | setIndexedStoreAction(im, VT, Legal); | ||||
319 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
320 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
321 | } | ||||
322 | } | ||||
323 | |||||
324 | const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; | ||||
325 | for (auto VT : FloatTypes) { | ||||
326 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
327 | if (!HasMVEFP) | ||||
328 | setAllExpand(VT); | ||||
329 | |||||
330 | // These are legal or custom whether we have MVE.fp or not | ||||
331 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
332 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
333 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom); | ||||
334 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
335 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
336 | setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); | ||||
337 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); | ||||
338 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
339 | setOperationAction(ISD::MLOAD, VT, Custom); | ||||
340 | setOperationAction(ISD::MSTORE, VT, Legal); | ||||
341 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
342 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
343 | |||||
344 | // Pre and Post inc are supported on loads and stores | ||||
345 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
346 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
347 | setIndexedLoadAction(im, VT, Legal); | ||||
348 | setIndexedStoreAction(im, VT, Legal); | ||||
349 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
350 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
351 | } | ||||
352 | |||||
353 | if (HasMVEFP) { | ||||
354 | setOperationAction(ISD::FMINNUM, VT, Legal); | ||||
355 | setOperationAction(ISD::FMAXNUM, VT, Legal); | ||||
356 | setOperationAction(ISD::FROUND, VT, Legal); | ||||
357 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); | ||||
358 | setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); | ||||
359 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); | ||||
360 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); | ||||
361 | |||||
362 | // No native support for these. | ||||
363 | setOperationAction(ISD::FDIV, VT, Expand); | ||||
364 | setOperationAction(ISD::FREM, VT, Expand); | ||||
365 | setOperationAction(ISD::FSQRT, VT, Expand); | ||||
366 | setOperationAction(ISD::FSIN, VT, Expand); | ||||
367 | setOperationAction(ISD::FCOS, VT, Expand); | ||||
368 | setOperationAction(ISD::FPOW, VT, Expand); | ||||
369 | setOperationAction(ISD::FLOG, VT, Expand); | ||||
370 | setOperationAction(ISD::FLOG2, VT, Expand); | ||||
371 | setOperationAction(ISD::FLOG10, VT, Expand); | ||||
372 | setOperationAction(ISD::FEXP, VT, Expand); | ||||
373 | setOperationAction(ISD::FEXP2, VT, Expand); | ||||
374 | setOperationAction(ISD::FNEARBYINT, VT, Expand); | ||||
375 | } | ||||
376 | } | ||||
377 | |||||
378 | // Custom Expand smaller than legal vector reductions to prevent false zero | ||||
379 | // items being added. | ||||
380 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom); | ||||
381 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom); | ||||
382 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom); | ||||
383 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom); | ||||
384 | setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom); | ||||
385 | setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom); | ||||
386 | setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom); | ||||
387 | setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom); | ||||
388 | |||||
389 | // We 'support' these types up to bitcast/load/store level, regardless of | ||||
390 | // MVE integer-only / float support. Only doing FP data processing on the FP | ||||
391 | // vector types is inhibited at integer-only level. | ||||
392 | const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 }; | ||||
393 | for (auto VT : LongTypes) { | ||||
394 | addRegisterClass(VT, &ARM::MQPRRegClass); | ||||
395 | setAllExpand(VT); | ||||
396 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
397 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
398 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
399 | setOperationAction(ISD::VSELECT, VT, Legal); | ||||
400 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
401 | } | ||||
402 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); | ||||
403 | |||||
404 | // We can do bitwise operations on v2i64 vectors | ||||
405 | setOperationAction(ISD::AND, MVT::v2i64, Legal); | ||||
406 | setOperationAction(ISD::OR, MVT::v2i64, Legal); | ||||
407 | setOperationAction(ISD::XOR, MVT::v2i64, Legal); | ||||
408 | |||||
409 | // It is legal to extload from v4i8 to v4i16 or v4i32. | ||||
410 | addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); | ||||
411 | addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal); | ||||
412 | addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal); | ||||
413 | |||||
414 | // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16. | ||||
415 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); | ||||
416 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); | ||||
417 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); | ||||
418 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal); | ||||
419 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal); | ||||
420 | |||||
421 | // Some truncating stores are legal too. | ||||
422 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); | ||||
423 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); | ||||
424 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); | ||||
425 | |||||
426 | // Pre and Post inc on these are legal, given the correct extends | ||||
427 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
428 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
429 | for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { | ||||
430 | setIndexedLoadAction(im, VT, Legal); | ||||
431 | setIndexedStoreAction(im, VT, Legal); | ||||
432 | setIndexedMaskedLoadAction(im, VT, Legal); | ||||
433 | setIndexedMaskedStoreAction(im, VT, Legal); | ||||
434 | } | ||||
435 | } | ||||
436 | |||||
437 | // Predicate types | ||||
438 | const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1}; | ||||
439 | for (auto VT : pTypes) { | ||||
440 | addRegisterClass(VT, &ARM::VCCRRegClass); | ||||
441 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | ||||
442 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | ||||
443 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); | ||||
444 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); | ||||
445 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | ||||
446 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | ||||
447 | setOperationAction(ISD::SETCC, VT, Custom); | ||||
448 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); | ||||
449 | setOperationAction(ISD::LOAD, VT, Custom); | ||||
450 | setOperationAction(ISD::STORE, VT, Custom); | ||||
451 | setOperationAction(ISD::TRUNCATE, VT, Custom); | ||||
452 | setOperationAction(ISD::VSELECT, VT, Expand); | ||||
453 | setOperationAction(ISD::SELECT, VT, Expand); | ||||
454 | setOperationAction(ISD::SELECT_CC, VT, Expand); | ||||
455 | |||||
456 | if (!HasMVEFP) { | ||||
457 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); | ||||
458 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); | ||||
459 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); | ||||
460 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); | ||||
461 | } | ||||
462 | } | ||||
463 | setOperationAction(ISD::SETCC, MVT::v2i1, Expand); | ||||
464 | setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand); | ||||
465 | setOperationAction(ISD::AND, MVT::v2i1, Expand); | ||||
466 | setOperationAction(ISD::OR, MVT::v2i1, Expand); | ||||
467 | setOperationAction(ISD::XOR, MVT::v2i1, Expand); | ||||
468 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand); | ||||
469 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand); | ||||
470 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand); | ||||
471 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand); | ||||
472 | |||||
473 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); | ||||
474 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); | ||||
475 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); | ||||
476 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); | ||||
477 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); | ||||
478 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); | ||||
479 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); | ||||
480 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); | ||||
481 | } | ||||
482 | |||||
483 | ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, | ||||
484 | const ARMSubtarget &STI) | ||||
485 | : TargetLowering(TM), Subtarget(&STI) { | ||||
486 | RegInfo = Subtarget->getRegisterInfo(); | ||||
487 | Itins = Subtarget->getInstrItineraryData(); | ||||
488 | |||||
489 | setBooleanContents(ZeroOrOneBooleanContent); | ||||
490 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); | ||||
491 | |||||
492 | if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && | ||||
493 | !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) { | ||||
494 | bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; | ||||
495 | for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) | ||||
496 | setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), | ||||
497 | IsHFTarget ? CallingConv::ARM_AAPCS_VFP | ||||
498 | : CallingConv::ARM_AAPCS); | ||||
499 | } | ||||
500 | |||||
501 | if (Subtarget->isTargetMachO()) { | ||||
502 | // Uses VFP for Thumb libfuncs if available. | ||||
503 | if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && | ||||
504 | Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { | ||||
505 | static const struct { | ||||
506 | const RTLIB::Libcall Op; | ||||
507 | const char * const Name; | ||||
508 | const ISD::CondCode Cond; | ||||
509 | } LibraryCalls[] = { | ||||
510 | // Single-precision floating-point arithmetic. | ||||
511 | { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, | ||||
512 | { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, | ||||
513 | { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, | ||||
514 | { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, | ||||
515 | |||||
516 | // Double-precision floating-point arithmetic. | ||||
517 | { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, | ||||
518 | { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, | ||||
519 | { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, | ||||
520 | { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, | ||||
521 | |||||
522 | // Single-precision comparisons. | ||||
523 | { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, | ||||
524 | { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, | ||||
525 | { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, | ||||
526 | { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, | ||||
527 | { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, | ||||
528 | { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, | ||||
529 | { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, | ||||
530 | |||||
531 | // Double-precision comparisons. | ||||
532 | { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, | ||||
533 | { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, | ||||
534 | { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, | ||||
535 | { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, | ||||
536 | { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, | ||||
537 | { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, | ||||
538 | { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, | ||||
539 | |||||
540 | // Floating-point to integer conversions. | ||||
541 | // i64 conversions are done via library routines even when generating VFP | ||||
542 | // instructions, so use the same ones. | ||||
543 | { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, | ||||
544 | { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, | ||||
545 | { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, | ||||
546 | { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, | ||||
547 | |||||
548 | // Conversions between floating types. | ||||
549 | { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, | ||||
550 | { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, | ||||
551 | |||||
552 | // Integer to floating-point conversions. | ||||
553 | // i64 conversions are done via library routines even when generating VFP | ||||
554 | // instructions, so use the same ones. | ||||
555 | // FIXME: There appears to be some naming inconsistency in ARM libgcc: | ||||
556 | // e.g., __floatunsidf vs. __floatunssidfvfp. | ||||
557 | { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, | ||||
558 | { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, | ||||
559 | { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, | ||||
560 | { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, | ||||
561 | }; | ||||
562 | |||||
563 | for (const auto &LC : LibraryCalls) { | ||||
564 | setLibcallName(LC.Op, LC.Name); | ||||
565 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
566 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
567 | } | ||||
568 | } | ||||
569 | } | ||||
570 | |||||
571 | // These libcalls are not available in 32-bit. | ||||
572 | setLibcallName(RTLIB::SHL_I128, nullptr); | ||||
573 | setLibcallName(RTLIB::SRL_I128, nullptr); | ||||
574 | setLibcallName(RTLIB::SRA_I128, nullptr); | ||||
575 | setLibcallName(RTLIB::MUL_I128, nullptr); | ||||
576 | setLibcallName(RTLIB::MULO_I64, nullptr); | ||||
577 | setLibcallName(RTLIB::MULO_I128, nullptr); | ||||
578 | |||||
579 | // RTLIB | ||||
580 | if (Subtarget->isAAPCS_ABI() && | ||||
581 | (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || | ||||
582 | Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { | ||||
583 | static const struct { | ||||
584 | const RTLIB::Libcall Op; | ||||
585 | const char * const Name; | ||||
586 | const CallingConv::ID CC; | ||||
587 | const ISD::CondCode Cond; | ||||
588 | } LibraryCalls[] = { | ||||
589 | // Double-precision floating-point arithmetic helper functions | ||||
590 | // RTABI chapter 4.1.2, Table 2 | ||||
591 | { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
592 | { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
593 | { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
594 | { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
595 | |||||
596 | // Double-precision floating-point comparison helper functions | ||||
597 | // RTABI chapter 4.1.2, Table 3 | ||||
598 | { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
599 | { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | ||||
600 | { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
601 | { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
602 | { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
603 | { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
604 | { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
605 | |||||
606 | // Single-precision floating-point arithmetic helper functions | ||||
607 | // RTABI chapter 4.1.2, Table 4 | ||||
608 | { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
609 | { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
610 | { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
611 | { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
612 | |||||
613 | // Single-precision floating-point comparison helper functions | ||||
614 | // RTABI chapter 4.1.2, Table 5 | ||||
615 | { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
616 | { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, | ||||
617 | { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
618 | { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
619 | { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
620 | { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
621 | { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, | ||||
622 | |||||
623 | // Floating-point to integer conversions. | ||||
624 | // RTABI chapter 4.1.2, Table 6 | ||||
625 | { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
626 | { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
627 | { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
628 | { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
629 | { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
630 | { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
631 | { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
632 | { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
633 | |||||
634 | // Conversions between floating types. | ||||
635 | // RTABI chapter 4.1.2, Table 7 | ||||
636 | { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
637 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
638 | { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
639 | |||||
640 | // Integer to floating-point conversions. | ||||
641 | // RTABI chapter 4.1.2, Table 8 | ||||
642 | { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
643 | { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
644 | { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
645 | { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
646 | { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
647 | { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
648 | { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
649 | { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
650 | |||||
651 | // Long long helper functions | ||||
652 | // RTABI chapter 4.2, Table 9 | ||||
653 | { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
654 | { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
655 | { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
656 | { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
657 | |||||
658 | // Integer division functions | ||||
659 | // RTABI chapter 4.3.1 | ||||
660 | { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
661 | { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
662 | { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
663 | { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
664 | { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
665 | { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
666 | { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
667 | { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
668 | }; | ||||
669 | |||||
670 | for (const auto &LC : LibraryCalls) { | ||||
671 | setLibcallName(LC.Op, LC.Name); | ||||
672 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
673 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
674 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
675 | } | ||||
676 | |||||
677 | // EABI dependent RTLIB | ||||
678 | if (TM.Options.EABIVersion == EABI::EABI4 || | ||||
679 | TM.Options.EABIVersion == EABI::EABI5) { | ||||
680 | static const struct { | ||||
681 | const RTLIB::Libcall Op; | ||||
682 | const char *const Name; | ||||
683 | const CallingConv::ID CC; | ||||
684 | const ISD::CondCode Cond; | ||||
685 | } MemOpsLibraryCalls[] = { | ||||
686 | // Memory operations | ||||
687 | // RTABI chapter 4.3.4 | ||||
688 | { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
689 | { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
690 | { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, | ||||
691 | }; | ||||
692 | |||||
693 | for (const auto &LC : MemOpsLibraryCalls) { | ||||
694 | setLibcallName(LC.Op, LC.Name); | ||||
695 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
696 | if (LC.Cond != ISD::SETCC_INVALID) | ||||
697 | setCmpLibcallCC(LC.Op, LC.Cond); | ||||
698 | } | ||||
699 | } | ||||
700 | } | ||||
701 | |||||
702 | if (Subtarget->isTargetWindows()) { | ||||
703 | static const struct { | ||||
704 | const RTLIB::Libcall Op; | ||||
705 | const char * const Name; | ||||
706 | const CallingConv::ID CC; | ||||
707 | } LibraryCalls[] = { | ||||
708 | { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, | ||||
709 | { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, | ||||
710 | { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, | ||||
711 | { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, | ||||
712 | { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, | ||||
713 | { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, | ||||
714 | { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, | ||||
715 | { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, | ||||
716 | }; | ||||
717 | |||||
718 | for (const auto &LC : LibraryCalls) { | ||||
719 | setLibcallName(LC.Op, LC.Name); | ||||
720 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
721 | } | ||||
722 | } | ||||
723 | |||||
724 | // Use divmod compiler-rt calls for iOS 5.0 and later. | ||||
725 | if (Subtarget->isTargetMachO() && | ||||
726 | !(Subtarget->isTargetIOS() && | ||||
727 | Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { | ||||
728 | setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); | ||||
729 | setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); | ||||
730 | } | ||||
731 | |||||
732 | // The half <-> float conversion functions are always soft-float on | ||||
733 | // non-watchos platforms, but are needed for some targets which use a | ||||
734 | // hard-float calling convention by default. | ||||
735 | if (!Subtarget->isTargetWatchABI()) { | ||||
736 | if (Subtarget->isAAPCS_ABI()) { | ||||
737 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); | ||||
738 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); | ||||
739 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); | ||||
740 | } else { | ||||
741 | setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); | ||||
742 | setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); | ||||
743 | setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); | ||||
744 | } | ||||
745 | } | ||||
746 | |||||
747 | // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have | ||||
748 | // a __gnu_ prefix (which is the default). | ||||
749 | if (Subtarget->isTargetAEABI()) { | ||||
750 | static const struct { | ||||
751 | const RTLIB::Libcall Op; | ||||
752 | const char * const Name; | ||||
753 | const CallingConv::ID CC; | ||||
754 | } LibraryCalls[] = { | ||||
755 | { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, | ||||
756 | { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, | ||||
757 | { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, | ||||
758 | }; | ||||
759 | |||||
760 | for (const auto &LC : LibraryCalls) { | ||||
761 | setLibcallName(LC.Op, LC.Name); | ||||
762 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
763 | } | ||||
764 | } | ||||
765 | |||||
766 | if (Subtarget->isThumb1Only()) | ||||
767 | addRegisterClass(MVT::i32, &ARM::tGPRRegClass); | ||||
768 | else | ||||
769 | addRegisterClass(MVT::i32, &ARM::GPRRegClass); | ||||
770 | |||||
771 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && | ||||
772 | Subtarget->hasFPRegs()) { | ||||
773 | addRegisterClass(MVT::f32, &ARM::SPRRegClass); | ||||
774 | addRegisterClass(MVT::f64, &ARM::DPRRegClass); | ||||
775 | |||||
776 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); | ||||
777 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); | ||||
778 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); | ||||
779 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); | ||||
780 | |||||
781 | if (!Subtarget->hasVFP2Base()) | ||||
782 | setAllExpand(MVT::f32); | ||||
783 | if (!Subtarget->hasFP64()) | ||||
784 | setAllExpand(MVT::f64); | ||||
785 | } | ||||
786 | |||||
787 | if (Subtarget->hasFullFP16()) { | ||||
788 | addRegisterClass(MVT::f16, &ARM::HPRRegClass); | ||||
789 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); | ||||
790 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); | ||||
791 | |||||
792 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); | ||||
793 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); | ||||
794 | } | ||||
795 | |||||
796 | if (Subtarget->hasBF16()) { | ||||
797 | addRegisterClass(MVT::bf16, &ARM::HPRRegClass); | ||||
798 | setAllExpand(MVT::bf16); | ||||
799 | if (!Subtarget->hasFullFP16()) | ||||
800 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); | ||||
801 | } | ||||
802 | |||||
803 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
804 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { | ||||
805 | setTruncStoreAction(VT, InnerVT, Expand); | ||||
806 | addAllExtLoads(VT, InnerVT, Expand); | ||||
807 | } | ||||
808 | |||||
809 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); | ||||
810 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); | ||||
811 | |||||
812 | setOperationAction(ISD::BSWAP, VT, Expand); | ||||
813 | } | ||||
814 | |||||
815 | setOperationAction(ISD::ConstantFP, MVT::f32, Custom); | ||||
816 | setOperationAction(ISD::ConstantFP, MVT::f64, Custom); | ||||
817 | |||||
818 | setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); | ||||
819 | setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); | ||||
820 | |||||
821 | if (Subtarget->hasMVEIntegerOps()) | ||||
822 | addMVEVectorTypes(Subtarget->hasMVEFloatOps()); | ||||
823 | |||||
824 | // Combine low-overhead loop intrinsics so that we can lower i1 types. | ||||
825 | if (Subtarget->hasLOB()) { | ||||
826 | setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC}); | ||||
827 | } | ||||
828 | |||||
829 | if (Subtarget->hasNEON()) { | ||||
830 | addDRTypeForNEON(MVT::v2f32); | ||||
831 | addDRTypeForNEON(MVT::v8i8); | ||||
832 | addDRTypeForNEON(MVT::v4i16); | ||||
833 | addDRTypeForNEON(MVT::v2i32); | ||||
834 | addDRTypeForNEON(MVT::v1i64); | ||||
835 | |||||
836 | addQRTypeForNEON(MVT::v4f32); | ||||
837 | addQRTypeForNEON(MVT::v2f64); | ||||
838 | addQRTypeForNEON(MVT::v16i8); | ||||
839 | addQRTypeForNEON(MVT::v8i16); | ||||
840 | addQRTypeForNEON(MVT::v4i32); | ||||
841 | addQRTypeForNEON(MVT::v2i64); | ||||
842 | |||||
843 | if (Subtarget->hasFullFP16()) { | ||||
844 | addQRTypeForNEON(MVT::v8f16); | ||||
845 | addDRTypeForNEON(MVT::v4f16); | ||||
846 | } | ||||
847 | |||||
848 | if (Subtarget->hasBF16()) { | ||||
849 | addQRTypeForNEON(MVT::v8bf16); | ||||
850 | addDRTypeForNEON(MVT::v4bf16); | ||||
851 | } | ||||
852 | } | ||||
853 | |||||
854 | if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) { | ||||
855 | // v2f64 is legal so that QR subregs can be extracted as f64 elements, but | ||||
856 | // none of Neon, MVE or VFP supports any arithmetic operations on it. | ||||
857 | setOperationAction(ISD::FADD, MVT::v2f64, Expand); | ||||
858 | setOperationAction(ISD::FSUB, MVT::v2f64, Expand); | ||||
859 | setOperationAction(ISD::FMUL, MVT::v2f64, Expand); | ||||
860 | // FIXME: Code duplication: FDIV and FREM are expanded always, see | ||||
861 | // ARMTargetLowering::addTypeForNEON method for details. | ||||
862 | setOperationAction(ISD::FDIV, MVT::v2f64, Expand); | ||||
863 | setOperationAction(ISD::FREM, MVT::v2f64, Expand); | ||||
864 | // FIXME: Create unittest. | ||||
865 | // In another words, find a way when "copysign" appears in DAG with vector | ||||
866 | // operands. | ||||
867 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); | ||||
868 | // FIXME: Code duplication: SETCC has custom operation action, see | ||||
869 | // ARMTargetLowering::addTypeForNEON method for details. | ||||
870 | setOperationAction(ISD::SETCC, MVT::v2f64, Expand); | ||||
871 | // FIXME: Create unittest for FNEG and for FABS. | ||||
872 | setOperationAction(ISD::FNEG, MVT::v2f64, Expand); | ||||
873 | setOperationAction(ISD::FABS, MVT::v2f64, Expand); | ||||
874 | setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); | ||||
875 | setOperationAction(ISD::FSIN, MVT::v2f64, Expand); | ||||
876 | setOperationAction(ISD::FCOS, MVT::v2f64, Expand); | ||||
877 | setOperationAction(ISD::FPOW, MVT::v2f64, Expand); | ||||
878 | setOperationAction(ISD::FLOG, MVT::v2f64, Expand); | ||||
879 | setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); | ||||
880 | setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); | ||||
881 | setOperationAction(ISD::FEXP, MVT::v2f64, Expand); | ||||
882 | setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); | ||||
883 | // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. | ||||
884 | setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); | ||||
885 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); | ||||
886 | setOperationAction(ISD::FRINT, MVT::v2f64, Expand); | ||||
887 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); | ||||
888 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); | ||||
889 | setOperationAction(ISD::FMA, MVT::v2f64, Expand); | ||||
890 | } | ||||
891 | |||||
892 | if (Subtarget->hasNEON()) { | ||||
893 | // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively | ||||
894 | // supported for v4f32. | ||||
895 | setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); | ||||
896 | setOperationAction(ISD::FSIN, MVT::v4f32, Expand); | ||||
897 | setOperationAction(ISD::FCOS, MVT::v4f32, Expand); | ||||
898 | setOperationAction(ISD::FPOW, MVT::v4f32, Expand); | ||||
899 | setOperationAction(ISD::FLOG, MVT::v4f32, Expand); | ||||
900 | setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); | ||||
901 | setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); | ||||
902 | setOperationAction(ISD::FEXP, MVT::v4f32, Expand); | ||||
903 | setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); | ||||
904 | setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); | ||||
905 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); | ||||
906 | setOperationAction(ISD::FRINT, MVT::v4f32, Expand); | ||||
907 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); | ||||
908 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); | ||||
909 | |||||
910 | // Mark v2f32 intrinsics. | ||||
911 | setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); | ||||
912 | setOperationAction(ISD::FSIN, MVT::v2f32, Expand); | ||||
913 | setOperationAction(ISD::FCOS, MVT::v2f32, Expand); | ||||
914 | setOperationAction(ISD::FPOW, MVT::v2f32, Expand); | ||||
915 | setOperationAction(ISD::FLOG, MVT::v2f32, Expand); | ||||
916 | setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); | ||||
917 | setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); | ||||
918 | setOperationAction(ISD::FEXP, MVT::v2f32, Expand); | ||||
919 | setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); | ||||
920 | setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); | ||||
921 | setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); | ||||
922 | setOperationAction(ISD::FRINT, MVT::v2f32, Expand); | ||||
923 | setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); | ||||
924 | setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); | ||||
925 | |||||
926 | // Neon does not support some operations on v1i64 and v2i64 types. | ||||
927 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); | ||||
928 | // Custom handling for some quad-vector types to detect VMULL. | ||||
929 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); | ||||
930 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); | ||||
931 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); | ||||
932 | // Custom handling for some vector types to avoid expensive expansions | ||||
933 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); | ||||
934 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); | ||||
935 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); | ||||
936 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); | ||||
937 | // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with | ||||
938 | // a destination type that is wider than the source, and nor does | ||||
939 | // it have a FP_TO_[SU]INT instruction with a narrower destination than | ||||
940 | // source. | ||||
941 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); | ||||
942 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); | ||||
943 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); | ||||
944 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); | ||||
945 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); | ||||
946 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); | ||||
947 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); | ||||
948 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); | ||||
949 | |||||
950 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); | ||||
951 | setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); | ||||
952 | |||||
953 | // NEON does not have single instruction CTPOP for vectors with element | ||||
954 | // types wider than 8-bits. However, custom lowering can leverage the | ||||
955 | // v8i8/v16i8 vcnt instruction. | ||||
956 | setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); | ||||
957 | setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); | ||||
958 | setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); | ||||
959 | setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); | ||||
960 | setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); | ||||
961 | setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); | ||||
962 | |||||
963 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); | ||||
964 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); | ||||
965 | |||||
966 | // NEON does not have single instruction CTTZ for vectors. | ||||
967 | setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); | ||||
968 | setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); | ||||
969 | setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); | ||||
970 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); | ||||
971 | |||||
972 | setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); | ||||
973 | setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); | ||||
974 | setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); | ||||
975 | setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); | ||||
976 | |||||
977 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); | ||||
978 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); | ||||
979 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); | ||||
980 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); | ||||
981 | |||||
982 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); | ||||
983 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); | ||||
984 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); | ||||
985 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); | ||||
986 | |||||
987 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
988 | setOperationAction(ISD::MULHS, VT, Expand); | ||||
989 | setOperationAction(ISD::MULHU, VT, Expand); | ||||
990 | } | ||||
991 | |||||
992 | // NEON only has FMA instructions as of VFP4. | ||||
993 | if (!Subtarget->hasVFP4Base()) { | ||||
994 | setOperationAction(ISD::FMA, MVT::v2f32, Expand); | ||||
995 | setOperationAction(ISD::FMA, MVT::v4f32, Expand); | ||||
996 | } | ||||
997 | |||||
998 | setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT, | ||||
999 | ISD::FP_TO_UINT, ISD::FDIV, ISD::LOAD}); | ||||
1000 | |||||
1001 | // It is legal to extload from v4i8 to v4i16 or v4i32. | ||||
1002 | for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, | ||||
1003 | MVT::v2i32}) { | ||||
1004 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { | ||||
1005 | setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); | ||||
1006 | setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); | ||||
1007 | setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); | ||||
1008 | } | ||||
1009 | } | ||||
1010 | |||||
1011 | for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, | ||||
1012 | MVT::v4i32}) { | ||||
1013 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); | ||||
1014 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); | ||||
1015 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); | ||||
1016 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); | ||||
1017 | } | ||||
1018 | } | ||||
1019 | |||||
1020 | if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { | ||||
1021 | setTargetDAGCombine( | ||||
1022 | {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR, | ||||
1023 | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | ||||
1024 | ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, | ||||
1025 | ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN, | ||||
1026 | ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST}); | ||||
1027 | } | ||||
1028 | if (Subtarget->hasMVEIntegerOps()) { | ||||
1029 | setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX, | ||||
1030 | ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC, | ||||
1031 | ISD::SETCC}); | ||||
1032 | } | ||||
1033 | if (Subtarget->hasMVEFloatOps()) { | ||||
1034 | setTargetDAGCombine(ISD::FADD); | ||||
1035 | } | ||||
1036 | |||||
1037 | if (!Subtarget->hasFP64()) { | ||||
1038 | // When targeting a floating-point unit with only single-precision | ||||
1039 | // operations, f64 is legal for the few double-precision instructions which | ||||
1040 | // are present However, no double-precision operations other than moves, | ||||
1041 | // loads and stores are provided by the hardware. | ||||
1042 | setOperationAction(ISD::FADD, MVT::f64, Expand); | ||||
1043 | setOperationAction(ISD::FSUB, MVT::f64, Expand); | ||||
1044 | setOperationAction(ISD::FMUL, MVT::f64, Expand); | ||||
1045 | setOperationAction(ISD::FMA, MVT::f64, Expand); | ||||
1046 | setOperationAction(ISD::FDIV, MVT::f64, Expand); | ||||
1047 | setOperationAction(ISD::FREM, MVT::f64, Expand); | ||||
1048 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); | ||||
1049 | setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); | ||||
1050 | setOperationAction(ISD::FNEG, MVT::f64, Expand); | ||||
1051 | setOperationAction(ISD::FABS, MVT::f64, Expand); | ||||
1052 | setOperationAction(ISD::FSQRT, MVT::f64, Expand); | ||||
1053 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | ||||
1054 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | ||||
1055 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | ||||
1056 | setOperationAction(ISD::FLOG, MVT::f64, Expand); | ||||
1057 | setOperationAction(ISD::FLOG2, MVT::f64, Expand); | ||||
1058 | setOperationAction(ISD::FLOG10, MVT::f64, Expand); | ||||
1059 | setOperationAction(ISD::FEXP, MVT::f64, Expand); | ||||
1060 | setOperationAction(ISD::FEXP2, MVT::f64, Expand); | ||||
1061 | setOperationAction(ISD::FCEIL, MVT::f64, Expand); | ||||
1062 | setOperationAction(ISD::FTRUNC, MVT::f64, Expand); | ||||
1063 | setOperationAction(ISD::FRINT, MVT::f64, Expand); | ||||
1064 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); | ||||
1065 | setOperationAction(ISD::FFLOOR, MVT::f64, Expand); | ||||
1066 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); | ||||
1067 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); | ||||
1068 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); | ||||
1069 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); | ||||
1070 | setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); | ||||
1071 | setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); | ||||
1072 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); | ||||
1073 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); | ||||
1074 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); | ||||
1075 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom); | ||||
1076 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom); | ||||
1077 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); | ||||
1078 | } | ||||
1079 | |||||
1080 | if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) { | ||||
1081 | setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); | ||||
1082 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); | ||||
1083 | if (Subtarget->hasFullFP16()) { | ||||
1084 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); | ||||
1085 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); | ||||
1086 | } | ||||
1087 | } | ||||
1088 | |||||
1089 | if (!Subtarget->hasFP16()) { | ||||
1090 | setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); | ||||
1091 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); | ||||
1092 | } | ||||
1093 | |||||
1094 | computeRegisterProperties(Subtarget->getRegisterInfo()); | ||||
1095 | |||||
1096 | // ARM does not have floating-point extending loads. | ||||
1097 | for (MVT VT : MVT::fp_valuetypes()) { | ||||
1098 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); | ||||
1099 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); | ||||
1100 | } | ||||
1101 | |||||
1102 | // ... or truncating stores | ||||
1103 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | ||||
1104 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); | ||||
1105 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); | ||||
1106 | |||||
1107 | // ARM does not have i1 sign extending load. | ||||
1108 | for (MVT VT : MVT::integer_valuetypes()) | ||||
1109 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); | ||||
1110 | |||||
1111 | // ARM supports all 4 flavors of integer indexed load / store. | ||||
1112 | if (!Subtarget->isThumb1Only()) { | ||||
1113 | for (unsigned im = (unsigned)ISD::PRE_INC; | ||||
1114 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { | ||||
1115 | setIndexedLoadAction(im, MVT::i1, Legal); | ||||
1116 | setIndexedLoadAction(im, MVT::i8, Legal); | ||||
1117 | setIndexedLoadAction(im, MVT::i16, Legal); | ||||
1118 | setIndexedLoadAction(im, MVT::i32, Legal); | ||||
1119 | setIndexedStoreAction(im, MVT::i1, Legal); | ||||
1120 | setIndexedStoreAction(im, MVT::i8, Legal); | ||||
1121 | setIndexedStoreAction(im, MVT::i16, Legal); | ||||
1122 | setIndexedStoreAction(im, MVT::i32, Legal); | ||||
1123 | } | ||||
1124 | } else { | ||||
1125 | // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. | ||||
1126 | setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); | ||||
1127 | setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); | ||||
1128 | } | ||||
1129 | |||||
1130 | setOperationAction(ISD::SADDO, MVT::i32, Custom); | ||||
1131 | setOperationAction(ISD::UADDO, MVT::i32, Custom); | ||||
1132 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); | ||||
1133 | setOperationAction(ISD::USUBO, MVT::i32, Custom); | ||||
1134 | |||||
1135 | setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); | ||||
1136 | setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); | ||||
1137 | if (Subtarget->hasDSP()) { | ||||
1138 | setOperationAction(ISD::SADDSAT, MVT::i8, Custom); | ||||
1139 | setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); | ||||
1140 | setOperationAction(ISD::SADDSAT, MVT::i16, Custom); | ||||
1141 | setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); | ||||
1142 | setOperationAction(ISD::UADDSAT, MVT::i8, Custom); | ||||
1143 | setOperationAction(ISD::USUBSAT, MVT::i8, Custom); | ||||
1144 | setOperationAction(ISD::UADDSAT, MVT::i16, Custom); | ||||
1145 | setOperationAction(ISD::USUBSAT, MVT::i16, Custom); | ||||
1146 | } | ||||
1147 | if (Subtarget->hasBaseDSP()) { | ||||
1148 | setOperationAction(ISD::SADDSAT, MVT::i32, Legal); | ||||
1149 | setOperationAction(ISD::SSUBSAT, MVT::i32, Legal); | ||||
1150 | } | ||||
1151 | |||||
1152 | // i64 operation support. | ||||
1153 | setOperationAction(ISD::MUL, MVT::i64, Expand); | ||||
1154 | setOperationAction(ISD::MULHU, MVT::i32, Expand); | ||||
1155 | if (Subtarget->isThumb1Only()) { | ||||
1156 | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); | ||||
1157 | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); | ||||
1158 | } | ||||
1159 | if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() | ||||
1160 | || (Subtarget->isThumb2() && !Subtarget->hasDSP())) | ||||
1161 | setOperationAction(ISD::MULHS, MVT::i32, Expand); | ||||
1162 | |||||
1163 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); | ||||
1164 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); | ||||
1165 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); | ||||
1166 | setOperationAction(ISD::SRL, MVT::i64, Custom); | ||||
1167 | setOperationAction(ISD::SRA, MVT::i64, Custom); | ||||
1168 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); | ||||
1169 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); | ||||
1170 | setOperationAction(ISD::LOAD, MVT::i64, Custom); | ||||
1171 | setOperationAction(ISD::STORE, MVT::i64, Custom); | ||||
1172 | |||||
1173 | // MVE lowers 64 bit shifts to lsll and lsrl | ||||
1174 | // assuming that ISD::SRL and SRA of i64 are already marked custom | ||||
1175 | if (Subtarget->hasMVEIntegerOps()) | ||||
1176 | setOperationAction(ISD::SHL, MVT::i64, Custom); | ||||
1177 | |||||
1178 | // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. | ||||
1179 | if (Subtarget->isThumb1Only()) { | ||||
1180 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); | ||||
1181 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); | ||||
1182 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); | ||||
1183 | } | ||||
1184 | |||||
1185 | if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) | ||||
1186 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); | ||||
1187 | |||||
1188 | // ARM does not have ROTL. | ||||
1189 | setOperationAction(ISD::ROTL, MVT::i32, Expand); | ||||
1190 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { | ||||
1191 | setOperationAction(ISD::ROTL, VT, Expand); | ||||
1192 | setOperationAction(ISD::ROTR, VT, Expand); | ||||
1193 | } | ||||
1194 | setOperationAction(ISD::CTTZ, MVT::i32, Custom); | ||||
1195 | setOperationAction(ISD::CTPOP, MVT::i32, Expand); | ||||
1196 | if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { | ||||
1197 | setOperationAction(ISD::CTLZ, MVT::i32, Expand); | ||||
1198 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); | ||||
1199 | } | ||||
1200 | |||||
1201 | // @llvm.readcyclecounter requires the Performance Monitors extension. | ||||
1202 | // Default to the 0 expansion on unsupported platforms. | ||||
1203 | // FIXME: Technically there are older ARM CPUs that have | ||||
1204 | // implementation-specific ways of obtaining this information. | ||||
1205 | if (Subtarget->hasPerfMon()) | ||||
1206 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); | ||||
1207 | |||||
1208 | // Only ARMv6 has BSWAP. | ||||
1209 | if (!Subtarget->hasV6Ops()) | ||||
1210 | setOperationAction(ISD::BSWAP, MVT::i32, Expand); | ||||
1211 | |||||
1212 | bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() | ||||
1213 | : Subtarget->hasDivideInARMMode(); | ||||
1214 | if (!hasDivide) { | ||||
1215 | // These are expanded into libcalls if the cpu doesn't have HW divider. | ||||
1216 | setOperationAction(ISD::SDIV, MVT::i32, LibCall); | ||||
1217 | setOperationAction(ISD::UDIV, MVT::i32, LibCall); | ||||
1218 | } | ||||
1219 | |||||
1220 | if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { | ||||
1221 | setOperationAction(ISD::SDIV, MVT::i32, Custom); | ||||
1222 | setOperationAction(ISD::UDIV, MVT::i32, Custom); | ||||
1223 | |||||
1224 | setOperationAction(ISD::SDIV, MVT::i64, Custom); | ||||
1225 | setOperationAction(ISD::UDIV, MVT::i64, Custom); | ||||
1226 | } | ||||
1227 | |||||
1228 | setOperationAction(ISD::SREM, MVT::i32, Expand); | ||||
1229 | setOperationAction(ISD::UREM, MVT::i32, Expand); | ||||
1230 | |||||
1231 | // Register based DivRem for AEABI (RTABI 4.2) | ||||
1232 | if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || | ||||
1233 | Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || | ||||
1234 | Subtarget->isTargetWindows()) { | ||||
1235 | setOperationAction(ISD::SREM, MVT::i64, Custom); | ||||
1236 | setOperationAction(ISD::UREM, MVT::i64, Custom); | ||||
1237 | HasStandaloneRem = false; | ||||
1238 | |||||
1239 | if (Subtarget->isTargetWindows()) { | ||||
1240 | const struct { | ||||
1241 | const RTLIB::Libcall Op; | ||||
1242 | const char * const Name; | ||||
1243 | const CallingConv::ID CC; | ||||
1244 | } LibraryCalls[] = { | ||||
1245 | { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1246 | { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1247 | { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, | ||||
1248 | { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, | ||||
1249 | |||||
1250 | { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1251 | { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1252 | { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, | ||||
1253 | { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, | ||||
1254 | }; | ||||
1255 | |||||
1256 | for (const auto &LC : LibraryCalls) { | ||||
1257 | setLibcallName(LC.Op, LC.Name); | ||||
1258 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
1259 | } | ||||
1260 | } else { | ||||
1261 | const struct { | ||||
1262 | const RTLIB::Libcall Op; | ||||
1263 | const char * const Name; | ||||
1264 | const CallingConv::ID CC; | ||||
1265 | } LibraryCalls[] = { | ||||
1266 | { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1267 | { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1268 | { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, | ||||
1269 | { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, | ||||
1270 | |||||
1271 | { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1272 | { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1273 | { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, | ||||
1274 | { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, | ||||
1275 | }; | ||||
1276 | |||||
1277 | for (const auto &LC : LibraryCalls) { | ||||
1278 | setLibcallName(LC.Op, LC.Name); | ||||
1279 | setLibcallCallingConv(LC.Op, LC.CC); | ||||
1280 | } | ||||
1281 | } | ||||
1282 | |||||
1283 | setOperationAction(ISD::SDIVREM, MVT::i32, Custom); | ||||
1284 | setOperationAction(ISD::UDIVREM, MVT::i32, Custom); | ||||
1285 | setOperationAction(ISD::SDIVREM, MVT::i64, Custom); | ||||
1286 | setOperationAction(ISD::UDIVREM, MVT::i64, Custom); | ||||
1287 | } else { | ||||
1288 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); | ||||
1289 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); | ||||
1290 | } | ||||
1291 | |||||
1292 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { | ||||
1293 | // MSVCRT doesn't have powi; fall back to pow | ||||
1294 | setLibcallName(RTLIB::POWI_F32, nullptr); | ||||
1295 | setLibcallName(RTLIB::POWI_F64, nullptr); | ||||
1296 | } | ||||
1297 | |||||
1298 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); | ||||
1299 | setOperationAction(ISD::ConstantPool, MVT::i32, Custom); | ||||
1300 | setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); | ||||
1301 | setOperationAction(ISD::BlockAddress, MVT::i32, Custom); | ||||
1302 | |||||
1303 | setOperationAction(ISD::TRAP, MVT::Other, Legal); | ||||
1304 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); | ||||
1305 | |||||
1306 | // Use the default implementation. | ||||
1307 | setOperationAction(ISD::VASTART, MVT::Other, Custom); | ||||
1308 | setOperationAction(ISD::VAARG, MVT::Other, Expand); | ||||
1309 | setOperationAction(ISD::VACOPY, MVT::Other, Expand); | ||||
1310 | setOperationAction(ISD::VAEND, MVT::Other, Expand); | ||||
1311 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); | ||||
1312 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); | ||||
1313 | |||||
1314 | if (Subtarget->isTargetWindows()) | ||||
1315 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); | ||||
1316 | else | ||||
1317 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); | ||||
1318 | |||||
1319 | // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use | ||||
1320 | // the default expansion. | ||||
1321 | InsertFencesForAtomic = false; | ||||
1322 | if (Subtarget->hasAnyDataBarrier() && | ||||
1323 | (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { | ||||
1324 | // ATOMIC_FENCE needs custom lowering; the others should have been expanded | ||||
1325 | // to ldrex/strex loops already. | ||||
1326 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); | ||||
1327 | if (!Subtarget->isThumb() || !Subtarget->isMClass()) | ||||
1328 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); | ||||
1329 | |||||
1330 | // On v8, we have particularly efficient implementations of atomic fences | ||||
1331 | // if they can be combined with nearby atomic loads and stores. | ||||
1332 | if (!Subtarget->hasAcquireRelease() || | ||||
1333 | getTargetMachine().getOptLevel() == 0) { | ||||
1334 | // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. | ||||
1335 | InsertFencesForAtomic = true; | ||||
1336 | } | ||||
1337 | } else { | ||||
1338 | // If there's anything we can use as a barrier, go through custom lowering | ||||
1339 | // for ATOMIC_FENCE. | ||||
1340 | // If target has DMB in thumb, Fences can be inserted. | ||||
1341 | if (Subtarget->hasDataBarrier()) | ||||
1342 | InsertFencesForAtomic = true; | ||||
1343 | |||||
1344 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, | ||||
1345 | Subtarget->hasAnyDataBarrier() ? Custom : Expand); | ||||
1346 | |||||
1347 | // Set them all for expansion, which will force libcalls. | ||||
1348 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); | ||||
1349 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); | ||||
1350 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); | ||||
1351 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); | ||||
1352 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); | ||||
1353 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); | ||||
1354 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); | ||||
1355 | setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); | ||||
1356 | setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); | ||||
1357 | setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); | ||||
1358 | setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); | ||||
1359 | setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); | ||||
1360 | // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the | ||||
1361 | // Unordered/Monotonic case. | ||||
1362 | if (!InsertFencesForAtomic) { | ||||
1363 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); | ||||
1364 | setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); | ||||
1365 | } | ||||
1366 | } | ||||
1367 | |||||
1368 | // Compute supported atomic widths. | ||||
1369 | if (Subtarget->isTargetLinux() || | ||||
1370 | (!Subtarget->isMClass() && Subtarget->hasV6Ops())) { | ||||
1371 | // For targets where __sync_* routines are reliably available, we use them | ||||
1372 | // if necessary. | ||||
1373 | // | ||||
1374 | // ARM Linux always supports 64-bit atomics through kernel-assisted atomic | ||||
1375 | // routines (kernel 3.1 or later). FIXME: Not with compiler-rt? | ||||
1376 | // | ||||
1377 | // ARMv6 targets have native instructions in ARM mode. For Thumb mode, | ||||
1378 | // such targets should provide __sync_* routines, which use the ARM mode | ||||
1379 | // instructions. (ARMv6 doesn't have dmb, but it has an equivalent | ||||
1380 | // encoding; see ARMISD::MEMBARRIER_MCR.) | ||||
1381 | setMaxAtomicSizeInBitsSupported(64); | ||||
1382 | } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || | ||||
1383 | Subtarget->hasForced32BitAtomics()) { | ||||
1384 | // Cortex-M (besides Cortex-M0) have 32-bit atomics. | ||||
1385 | setMaxAtomicSizeInBitsSupported(32); | ||||
1386 | } else { | ||||
1387 | // We can't assume anything about other targets; just use libatomic | ||||
1388 | // routines. | ||||
1389 | setMaxAtomicSizeInBitsSupported(0); | ||||
1390 | } | ||||
1391 | |||||
1392 | setMaxDivRemBitWidthSupported(64); | ||||
1393 | |||||
1394 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); | ||||
1395 | |||||
1396 | // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. | ||||
1397 | if (!Subtarget->hasV6Ops()) { | ||||
1398 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); | ||||
1399 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); | ||||
1400 | } | ||||
1401 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | ||||
1402 | |||||
1403 | if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && | ||||
1404 | !Subtarget->isThumb1Only()) { | ||||
1405 | // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR | ||||
1406 | // iff target supports vfp2. | ||||
1407 | setOperationAction(ISD::BITCAST, MVT::i64, Custom); | ||||
1408 | setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); | ||||
1409 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); | ||||
1410 | } | ||||
1411 | |||||
1412 | // We want to custom lower some of our intrinsics. | ||||
1413 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); | ||||
1414 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); | ||||
1415 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); | ||||
1416 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); | ||||
1417 | if (Subtarget->useSjLjEH()) | ||||
1418 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); | ||||
1419 | |||||
1420 | setOperationAction(ISD::SETCC, MVT::i32, Expand); | ||||
1421 | setOperationAction(ISD::SETCC, MVT::f32, Expand); | ||||
1422 | setOperationAction(ISD::SETCC, MVT::f64, Expand); | ||||
1423 | setOperationAction(ISD::SELECT, MVT::i32, Custom); | ||||
1424 | setOperationAction(ISD::SELECT, MVT::f32, Custom); | ||||
1425 | setOperationAction(ISD::SELECT, MVT::f64, Custom); | ||||
1426 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); | ||||
1427 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); | ||||
1428 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); | ||||
1429 | if (Subtarget->hasFullFP16()) { | ||||
1430 | setOperationAction(ISD::SETCC, MVT::f16, Expand); | ||||
1431 | setOperationAction(ISD::SELECT, MVT::f16, Custom); | ||||
1432 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); | ||||
1433 | } | ||||
1434 | |||||
1435 | setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); | ||||
1436 | |||||
1437 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); | ||||
1438 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); | ||||
1439 | if (Subtarget->hasFullFP16()) | ||||
1440 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); | ||||
1441 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); | ||||
1442 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); | ||||
1443 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); | ||||
1444 | |||||
1445 | // We don't support sin/cos/fmod/copysign/pow | ||||
1446 | setOperationAction(ISD::FSIN, MVT::f64, Expand); | ||||
1447 | setOperationAction(ISD::FSIN, MVT::f32, Expand); | ||||
1448 | setOperationAction(ISD::FCOS, MVT::f32, Expand); | ||||
1449 | setOperationAction(ISD::FCOS, MVT::f64, Expand); | ||||
1450 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); | ||||
1451 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); | ||||
1452 | setOperationAction(ISD::FREM, MVT::f64, Expand); | ||||
1453 | setOperationAction(ISD::FREM, MVT::f32, Expand); | ||||
1454 | if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && | ||||
1455 | !Subtarget->isThumb1Only()) { | ||||
1456 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); | ||||
1457 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); | ||||
1458 | } | ||||
1459 | setOperationAction(ISD::FPOW, MVT::f64, Expand); | ||||
1460 | setOperationAction(ISD::FPOW, MVT::f32, Expand); | ||||
1461 | |||||
1462 | if (!Subtarget->hasVFP4Base()) { | ||||
1463 | setOperationAction(ISD::FMA, MVT::f64, Expand); | ||||
1464 | setOperationAction(ISD::FMA, MVT::f32, Expand); | ||||
1465 | } | ||||
1466 | |||||
1467 | // Various VFP goodness | ||||
1468 | if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { | ||||
1469 | // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. | ||||
1470 | if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { | ||||
1471 | setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); | ||||
1472 | setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); | ||||
1473 | } | ||||
1474 | |||||
1475 | // fp16 is a special v7 extension that adds f16 <-> f32 conversions. | ||||
1476 | if (!Subtarget->hasFP16()) { | ||||
1477 | setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); | ||||
1478 | setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); | ||||
1479 | } | ||||
1480 | |||||
1481 | // Strict floating-point comparisons need custom lowering. | ||||
1482 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); | ||||
1483 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); | ||||
1484 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); | ||||
1485 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); | ||||
1486 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); | ||||
1487 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); | ||||
1488 | } | ||||
1489 | |||||
1490 | // Use __sincos_stret if available. | ||||
1491 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && | ||||
1492 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { | ||||
1493 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); | ||||
1494 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); | ||||
1495 | } | ||||
1496 | |||||
1497 | // FP-ARMv8 implements a lot of rounding-like FP operations. | ||||
1498 | if (Subtarget->hasFPARMv8Base()) { | ||||
1499 | setOperationAction(ISD::FFLOOR, MVT::f32, Legal); | ||||
1500 | setOperationAction(ISD::FCEIL, MVT::f32, Legal); | ||||
1501 | setOperationAction(ISD::FROUND, MVT::f32, Legal); | ||||
1502 | setOperationAction(ISD::FTRUNC, MVT::f32, Legal); | ||||
1503 | setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); | ||||
1504 | setOperationAction(ISD::FRINT, MVT::f32, Legal); | ||||
1505 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); | ||||
1506 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); | ||||
1507 | if (Subtarget->hasNEON()) { | ||||
1508 | setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); | ||||
1509 | setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); | ||||
1510 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); | ||||
1511 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); | ||||
1512 | } | ||||
1513 | |||||
1514 | if (Subtarget->hasFP64()) { | ||||
1515 | setOperationAction(ISD::FFLOOR, MVT::f64, Legal); | ||||
1516 | setOperationAction(ISD::FCEIL, MVT::f64, Legal); | ||||
1517 | setOperationAction(ISD::FROUND, MVT::f64, Legal); | ||||
1518 | setOperationAction(ISD::FTRUNC, MVT::f64, Legal); | ||||
1519 | setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); | ||||
1520 | setOperationAction(ISD::FRINT, MVT::f64, Legal); | ||||
1521 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); | ||||
1522 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); | ||||
1523 | } | ||||
1524 | } | ||||
1525 | |||||
1526 | // FP16 often need to be promoted to call lib functions | ||||
1527 | if (Subtarget->hasFullFP16()) { | ||||
1528 | setOperationAction(ISD::FREM, MVT::f16, Promote); | ||||
1529 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); | ||||
1530 | setOperationAction(ISD::FSIN, MVT::f16, Promote); | ||||
1531 | setOperationAction(ISD::FCOS, MVT::f16, Promote); | ||||
1532 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); | ||||
1533 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); | ||||
1534 | setOperationAction(ISD::FPOW, MVT::f16, Promote); | ||||
1535 | setOperationAction(ISD::FEXP, MVT::f16, Promote); | ||||
1536 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); | ||||
1537 | setOperationAction(ISD::FLOG, MVT::f16, Promote); | ||||
1538 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); | ||||
1539 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); | ||||
1540 | |||||
1541 | setOperationAction(ISD::FROUND, MVT::f16, Legal); | ||||
1542 | } | ||||
1543 | |||||
1544 | if (Subtarget->hasNEON()) { | ||||
1545 | // vmin and vmax aren't available in a scalar form, so we can use | ||||
1546 | // a NEON instruction with an undef lane instead. This has a performance | ||||
1547 | // penalty on some cores, so we don't do this unless we have been | ||||
1548 | // asked to by the core tuning model. | ||||
1549 | if (Subtarget->useNEONForSinglePrecisionFP()) { | ||||
1550 | setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); | ||||
1551 | setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); | ||||
1552 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); | ||||
1553 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); | ||||
1554 | } | ||||
1555 | setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); | ||||
1556 | setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); | ||||
1557 | setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); | ||||
1558 | setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); | ||||
1559 | |||||
1560 | if (Subtarget->hasFullFP16()) { | ||||
1561 | setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); | ||||
1562 | setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); | ||||
1563 | setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); | ||||
1564 | setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); | ||||
1565 | |||||
1566 | setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); | ||||
1567 | setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); | ||||
1568 | setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); | ||||
1569 | setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); | ||||
1570 | } | ||||
1571 | } | ||||
1572 | |||||
1573 | // We have target-specific dag combine patterns for the following nodes: | ||||
1574 | // ARMISD::VMOVRRD - No need to call setTargetDAGCombine | ||||
1575 | setTargetDAGCombine( | ||||
1576 | {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR}); | ||||
1577 | |||||
1578 | if (Subtarget->hasMVEIntegerOps()) | ||||
1579 | setTargetDAGCombine(ISD::VSELECT); | ||||
1580 | |||||
1581 | if (Subtarget->hasV6Ops()) | ||||
1582 | setTargetDAGCombine(ISD::SRL); | ||||
1583 | if (Subtarget->isThumb1Only()) | ||||
1584 | setTargetDAGCombine(ISD::SHL); | ||||
1585 | // Attempt to lower smin/smax to ssat/usat | ||||
1586 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || | ||||
1587 | Subtarget->isThumb2()) { | ||||
1588 | setTargetDAGCombine({ISD::SMIN, ISD::SMAX}); | ||||
1589 | } | ||||
1590 | |||||
1591 | setStackPointerRegisterToSaveRestore(ARM::SP); | ||||
1592 | |||||
1593 | if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || | ||||
1594 | !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) | ||||
1595 | setSchedulingPreference(Sched::RegPressure); | ||||
1596 | else | ||||
1597 | setSchedulingPreference(Sched::Hybrid); | ||||
1598 | |||||
1599 | //// temporary - rewrite interface to use type | ||||
1600 | MaxStoresPerMemset = 8; | ||||
1601 | MaxStoresPerMemsetOptSize = 4; | ||||
1602 | MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores | ||||
1603 | MaxStoresPerMemcpyOptSize = 2; | ||||
1604 | MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores | ||||
1605 | MaxStoresPerMemmoveOptSize = 2; | ||||
1606 | |||||
1607 | // On ARM arguments smaller than 4 bytes are extended, so all arguments | ||||
1608 | // are at least 4 bytes aligned. | ||||
1609 | setMinStackArgumentAlignment(Align(4)); | ||||
1610 | |||||
1611 | // Prefer likely predicted branches to selects on out-of-order cores. | ||||
1612 | PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); | ||||
1613 | |||||
1614 | setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); | ||||
1615 | |||||
1616 | setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); | ||||
1617 | |||||
1618 | if (Subtarget->isThumb() || Subtarget->isThumb2()) | ||||
1619 | setTargetDAGCombine(ISD::ABS); | ||||
1620 | } | ||||
1621 | |||||
1622 | bool ARMTargetLowering::useSoftFloat() const { | ||||
1623 | return Subtarget->useSoftFloat(); | ||||
1624 | } | ||||
1625 | |||||
1626 | // FIXME: It might make sense to define the representative register class as the | ||||
1627 | // nearest super-register that has a non-null superset. For example, DPR_VFP2 is | ||||
1628 | // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, | ||||
1629 | // SPR's representative would be DPR_VFP2. This should work well if register | ||||
1630 | // pressure tracking were modified such that a register use would increment the | ||||
1631 | // pressure of the register class's representative and all of it's super | ||||
1632 | // classes' representatives transitively. We have not implemented this because | ||||
1633 | // of the difficulty prior to coalescing of modeling operand register classes | ||||
1634 | // due to the common occurrence of cross class copies and subregister insertions | ||||
1635 | // and extractions. | ||||
1636 | std::pair<const TargetRegisterClass *, uint8_t> | ||||
1637 | ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, | ||||
1638 | MVT VT) const { | ||||
1639 | const TargetRegisterClass *RRC = nullptr; | ||||
1640 | uint8_t Cost = 1; | ||||
1641 | switch (VT.SimpleTy) { | ||||
1642 | default: | ||||
1643 | return TargetLowering::findRepresentativeClass(TRI, VT); | ||||
1644 | // Use DPR as representative register class for all floating point | ||||
1645 | // and vector types. Since there are 32 SPR registers and 32 DPR registers so | ||||
1646 | // the cost is 1 for both f32 and f64. | ||||
1647 | case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: | ||||
1648 | case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: | ||||
1649 | RRC = &ARM::DPRRegClass; | ||||
1650 | // When NEON is used for SP, only half of the register file is available | ||||
1651 | // because operations that define both SP and DP results will be constrained | ||||
1652 | // to the VFP2 class (D0-D15). We currently model this constraint prior to | ||||
1653 | // coalescing by double-counting the SP regs. See the FIXME above. | ||||
1654 | if (Subtarget->useNEONForSinglePrecisionFP()) | ||||
1655 | Cost = 2; | ||||
1656 | break; | ||||
1657 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: | ||||
1658 | case MVT::v4f32: case MVT::v2f64: | ||||
1659 | RRC = &ARM::DPRRegClass; | ||||
1660 | Cost = 2; | ||||
1661 | break; | ||||
1662 | case MVT::v4i64: | ||||
1663 | RRC = &ARM::DPRRegClass; | ||||
1664 | Cost = 4; | ||||
1665 | break; | ||||
1666 | case MVT::v8i64: | ||||
1667 | RRC = &ARM::DPRRegClass; | ||||
1668 | Cost = 8; | ||||
1669 | break; | ||||
1670 | } | ||||
1671 | return std::make_pair(RRC, Cost); | ||||
1672 | } | ||||
1673 | |||||
1674 | const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { | ||||
1675 | #define MAKE_CASE(V) \ | ||||
1676 | case V: \ | ||||
1677 | return #V; | ||||
1678 | switch ((ARMISD::NodeType)Opcode) { | ||||
1679 | case ARMISD::FIRST_NUMBER: | ||||
1680 | break; | ||||
1681 | MAKE_CASE(ARMISD::Wrapper) | ||||
1682 | MAKE_CASE(ARMISD::WrapperPIC) | ||||
1683 | MAKE_CASE(ARMISD::WrapperJT) | ||||
1684 | MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) | ||||
1685 | MAKE_CASE(ARMISD::CALL) | ||||
1686 | MAKE_CASE(ARMISD::CALL_PRED) | ||||
1687 | MAKE_CASE(ARMISD::CALL_NOLINK) | ||||
1688 | MAKE_CASE(ARMISD::tSECALL) | ||||
1689 | MAKE_CASE(ARMISD::t2CALL_BTI) | ||||
1690 | MAKE_CASE(ARMISD::BRCOND) | ||||
1691 | MAKE_CASE(ARMISD::BR_JT) | ||||
1692 | MAKE_CASE(ARMISD::BR2_JT) | ||||
1693 | MAKE_CASE(ARMISD::RET_FLAG) | ||||
1694 | MAKE_CASE(ARMISD::SERET_FLAG) | ||||
1695 | MAKE_CASE(ARMISD::INTRET_FLAG) | ||||
1696 | MAKE_CASE(ARMISD::PIC_ADD) | ||||
1697 | MAKE_CASE(ARMISD::CMP) | ||||
1698 | MAKE_CASE(ARMISD::CMN) | ||||
1699 | MAKE_CASE(ARMISD::CMPZ) | ||||
1700 | MAKE_CASE(ARMISD::CMPFP) | ||||
1701 | MAKE_CASE(ARMISD::CMPFPE) | ||||
1702 | MAKE_CASE(ARMISD::CMPFPw0) | ||||
1703 | MAKE_CASE(ARMISD::CMPFPEw0) | ||||
1704 | MAKE_CASE(ARMISD::BCC_i64) | ||||
1705 | MAKE_CASE(ARMISD::FMSTAT) | ||||
1706 | MAKE_CASE(ARMISD::CMOV) | ||||
1707 | MAKE_CASE(ARMISD::SUBS) | ||||
1708 | MAKE_CASE(ARMISD::SSAT) | ||||
1709 | MAKE_CASE(ARMISD::USAT) | ||||
1710 | MAKE_CASE(ARMISD::ASRL) | ||||
1711 | MAKE_CASE(ARMISD::LSRL) | ||||
1712 | MAKE_CASE(ARMISD::LSLL) | ||||
1713 | MAKE_CASE(ARMISD::SRL_FLAG) | ||||
1714 | MAKE_CASE(ARMISD::SRA_FLAG) | ||||
1715 | MAKE_CASE(ARMISD::RRX) | ||||
1716 | MAKE_CASE(ARMISD::ADDC) | ||||
1717 | MAKE_CASE(ARMISD::ADDE) | ||||
1718 | MAKE_CASE(ARMISD::SUBC) | ||||
1719 | MAKE_CASE(ARMISD::SUBE) | ||||
1720 | MAKE_CASE(ARMISD::LSLS) | ||||
1721 | MAKE_CASE(ARMISD::VMOVRRD) | ||||
1722 | MAKE_CASE(ARMISD::VMOVDRR) | ||||
1723 | MAKE_CASE(ARMISD::VMOVhr) | ||||
1724 | MAKE_CASE(ARMISD::VMOVrh) | ||||
1725 | MAKE_CASE(ARMISD::VMOVSR) | ||||
1726 | MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) | ||||
1727 | MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) | ||||
1728 | MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) | ||||
1729 | MAKE_CASE(ARMISD::TC_RETURN) | ||||
1730 | MAKE_CASE(ARMISD::THREAD_POINTER) | ||||
1731 | MAKE_CASE(ARMISD::DYN_ALLOC) | ||||
1732 | MAKE_CASE(ARMISD::MEMBARRIER_MCR) | ||||
1733 | MAKE_CASE(ARMISD::PRELOAD) | ||||
1734 | MAKE_CASE(ARMISD::LDRD) | ||||
1735 | MAKE_CASE(ARMISD::STRD) | ||||
1736 | MAKE_CASE(ARMISD::WIN__CHKSTK) | ||||
1737 | MAKE_CASE(ARMISD::WIN__DBZCHK) | ||||
1738 | MAKE_CASE(ARMISD::PREDICATE_CAST) | ||||
1739 | MAKE_CASE(ARMISD::VECTOR_REG_CAST) | ||||
1740 | MAKE_CASE(ARMISD::MVESEXT) | ||||
1741 | MAKE_CASE(ARMISD::MVEZEXT) | ||||
1742 | MAKE_CASE(ARMISD::MVETRUNC) | ||||
1743 | MAKE_CASE(ARMISD::VCMP) | ||||
1744 | MAKE_CASE(ARMISD::VCMPZ) | ||||
1745 | MAKE_CASE(ARMISD::VTST) | ||||
1746 | MAKE_CASE(ARMISD::VSHLs) | ||||
1747 | MAKE_CASE(ARMISD::VSHLu) | ||||
1748 | MAKE_CASE(ARMISD::VSHLIMM) | ||||
1749 | MAKE_CASE(ARMISD::VSHRsIMM) | ||||
1750 | MAKE_CASE(ARMISD::VSHRuIMM) | ||||
1751 | MAKE_CASE(ARMISD::VRSHRsIMM) | ||||
1752 | MAKE_CASE(ARMISD::VRSHRuIMM) | ||||
1753 | MAKE_CASE(ARMISD::VRSHRNIMM) | ||||
1754 | MAKE_CASE(ARMISD::VQSHLsIMM) | ||||
1755 | MAKE_CASE(ARMISD::VQSHLuIMM) | ||||
1756 | MAKE_CASE(ARMISD::VQSHLsuIMM) | ||||
1757 | MAKE_CASE(ARMISD::VQSHRNsIMM) | ||||
1758 | MAKE_CASE(ARMISD::VQSHRNuIMM) | ||||
1759 | MAKE_CASE(ARMISD::VQSHRNsuIMM) | ||||
1760 | MAKE_CASE(ARMISD::VQRSHRNsIMM) | ||||
1761 | MAKE_CASE(ARMISD::VQRSHRNuIMM) | ||||
1762 | MAKE_CASE(ARMISD::VQRSHRNsuIMM) | ||||
1763 | MAKE_CASE(ARMISD::VSLIIMM) | ||||
1764 | MAKE_CASE(ARMISD::VSRIIMM) | ||||
1765 | MAKE_CASE(ARMISD::VGETLANEu) | ||||
1766 | MAKE_CASE(ARMISD::VGETLANEs) | ||||
1767 | MAKE_CASE(ARMISD::VMOVIMM) | ||||
1768 | MAKE_CASE(ARMISD::VMVNIMM) | ||||
1769 | MAKE_CASE(ARMISD::VMOVFPIMM) | ||||
1770 | MAKE_CASE(ARMISD::VDUP) | ||||
1771 | MAKE_CASE(ARMISD::VDUPLANE) | ||||
1772 | MAKE_CASE(ARMISD::VEXT) | ||||
1773 | MAKE_CASE(ARMISD::VREV64) | ||||
1774 | MAKE_CASE(ARMISD::VREV32) | ||||
1775 | MAKE_CASE(ARMISD::VREV16) | ||||
1776 | MAKE_CASE(ARMISD::VZIP) | ||||
1777 | MAKE_CASE(ARMISD::VUZP) | ||||
1778 | MAKE_CASE(ARMISD::VTRN) | ||||
1779 | MAKE_CASE(ARMISD::VTBL1) | ||||
1780 | MAKE_CASE(ARMISD::VTBL2) | ||||
1781 | MAKE_CASE(ARMISD::VMOVN) | ||||
1782 | MAKE_CASE(ARMISD::VQMOVNs) | ||||
1783 | MAKE_CASE(ARMISD::VQMOVNu) | ||||
1784 | MAKE_CASE(ARMISD::VCVTN) | ||||
1785 | MAKE_CASE(ARMISD::VCVTL) | ||||
1786 | MAKE_CASE(ARMISD::VIDUP) | ||||
1787 | MAKE_CASE(ARMISD::VMULLs) | ||||
1788 | MAKE_CASE(ARMISD::VMULLu) | ||||
1789 | MAKE_CASE(ARMISD::VQDMULH) | ||||
1790 | MAKE_CASE(ARMISD::VADDVs) | ||||
1791 | MAKE_CASE(ARMISD::VADDVu) | ||||
1792 | MAKE_CASE(ARMISD::VADDVps) | ||||
1793 | MAKE_CASE(ARMISD::VADDVpu) | ||||
1794 | MAKE_CASE(ARMISD::VADDLVs) | ||||
1795 | MAKE_CASE(ARMISD::VADDLVu) | ||||
1796 | MAKE_CASE(ARMISD::VADDLVAs) | ||||
1797 | MAKE_CASE(ARMISD::VADDLVAu) | ||||
1798 | MAKE_CASE(ARMISD::VADDLVps) | ||||
1799 | MAKE_CASE(ARMISD::VADDLVpu) | ||||
1800 | MAKE_CASE(ARMISD::VADDLVAps) | ||||
1801 | MAKE_CASE(ARMISD::VADDLVApu) | ||||
1802 | MAKE_CASE(ARMISD::VMLAVs) | ||||
1803 | MAKE_CASE(ARMISD::VMLAVu) | ||||
1804 | MAKE_CASE(ARMISD::VMLAVps) | ||||
1805 | MAKE_CASE(ARMISD::VMLAVpu) | ||||
1806 | MAKE_CASE(ARMISD::VMLALVs) | ||||
1807 | MAKE_CASE(ARMISD::VMLALVu) | ||||
1808 | MAKE_CASE(ARMISD::VMLALVps) | ||||
1809 | MAKE_CASE(ARMISD::VMLALVpu) | ||||
1810 | MAKE_CASE(ARMISD::VMLALVAs) | ||||
1811 | MAKE_CASE(ARMISD::VMLALVAu) | ||||
1812 | MAKE_CASE(ARMISD::VMLALVAps) | ||||
1813 | MAKE_CASE(ARMISD::VMLALVApu) | ||||
1814 | MAKE_CASE(ARMISD::VMINVu) | ||||
1815 | MAKE_CASE(ARMISD::VMINVs) | ||||
1816 | MAKE_CASE(ARMISD::VMAXVu) | ||||
1817 | MAKE_CASE(ARMISD::VMAXVs) | ||||
1818 | MAKE_CASE(ARMISD::UMAAL) | ||||
1819 | MAKE_CASE(ARMISD::UMLAL) | ||||
1820 | MAKE_CASE(ARMISD::SMLAL) | ||||
1821 | MAKE_CASE(ARMISD::SMLALBB) | ||||
1822 | MAKE_CASE(ARMISD::SMLALBT) | ||||
1823 | MAKE_CASE(ARMISD::SMLALTB) | ||||
1824 | MAKE_CASE(ARMISD::SMLALTT) | ||||
1825 | MAKE_CASE(ARMISD::SMULWB) | ||||
1826 | MAKE_CASE(ARMISD::SMULWT) | ||||
1827 | MAKE_CASE(ARMISD::SMLALD) | ||||
1828 | MAKE_CASE(ARMISD::SMLALDX) | ||||
1829 | MAKE_CASE(ARMISD::SMLSLD) | ||||
1830 | MAKE_CASE(ARMISD::SMLSLDX) | ||||
1831 | MAKE_CASE(ARMISD::SMMLAR) | ||||
1832 | MAKE_CASE(ARMISD::SMMLSR) | ||||
1833 | MAKE_CASE(ARMISD::QADD16b) | ||||
1834 | MAKE_CASE(ARMISD::QSUB16b) | ||||
1835 | MAKE_CASE(ARMISD::QADD8b) | ||||
1836 | MAKE_CASE(ARMISD::QSUB8b) | ||||
1837 | MAKE_CASE(ARMISD::UQADD16b) | ||||
1838 | MAKE_CASE(ARMISD::UQSUB16b) | ||||
1839 | MAKE_CASE(ARMISD::UQADD8b) | ||||
1840 | MAKE_CASE(ARMISD::UQSUB8b) | ||||
1841 | MAKE_CASE(ARMISD::BUILD_VECTOR) | ||||
1842 | MAKE_CASE(ARMISD::BFI) | ||||
1843 | MAKE_CASE(ARMISD::VORRIMM) | ||||
1844 | MAKE_CASE(ARMISD::VBICIMM) | ||||
1845 | MAKE_CASE(ARMISD::VBSP) | ||||
1846 | MAKE_CASE(ARMISD::MEMCPY) | ||||
1847 | MAKE_CASE(ARMISD::VLD1DUP) | ||||
1848 | MAKE_CASE(ARMISD::VLD2DUP) | ||||
1849 | MAKE_CASE(ARMISD::VLD3DUP) | ||||
1850 | MAKE_CASE(ARMISD::VLD4DUP) | ||||
1851 | MAKE_CASE(ARMISD::VLD1_UPD) | ||||
1852 | MAKE_CASE(ARMISD::VLD2_UPD) | ||||
1853 | MAKE_CASE(ARMISD::VLD3_UPD) | ||||
1854 | MAKE_CASE(ARMISD::VLD4_UPD) | ||||
1855 | MAKE_CASE(ARMISD::VLD1x2_UPD) | ||||
1856 | MAKE_CASE(ARMISD::VLD1x3_UPD) | ||||
1857 | MAKE_CASE(ARMISD::VLD1x4_UPD) | ||||
1858 | MAKE_CASE(ARMISD::VLD2LN_UPD) | ||||
1859 | MAKE_CASE(ARMISD::VLD3LN_UPD) | ||||
1860 | MAKE_CASE(ARMISD::VLD4LN_UPD) | ||||
1861 | MAKE_CASE(ARMISD::VLD1DUP_UPD) | ||||
1862 | MAKE_CASE(ARMISD::VLD2DUP_UPD) | ||||
1863 | MAKE_CASE(ARMISD::VLD3DUP_UPD) | ||||
1864 | MAKE_CASE(ARMISD::VLD4DUP_UPD) | ||||
1865 | MAKE_CASE(ARMISD::VST1_UPD) | ||||
1866 | MAKE_CASE(ARMISD::VST2_UPD) | ||||
1867 | MAKE_CASE(ARMISD::VST3_UPD) | ||||
1868 | MAKE_CASE(ARMISD::VST4_UPD) | ||||
1869 | MAKE_CASE(ARMISD::VST1x2_UPD) | ||||
1870 | MAKE_CASE(ARMISD::VST1x3_UPD) | ||||
1871 | MAKE_CASE(ARMISD::VST1x4_UPD) | ||||
1872 | MAKE_CASE(ARMISD::VST2LN_UPD) | ||||
1873 | MAKE_CASE(ARMISD::VST3LN_UPD) | ||||
1874 | MAKE_CASE(ARMISD::VST4LN_UPD) | ||||
1875 | MAKE_CASE(ARMISD::WLS) | ||||
1876 | MAKE_CASE(ARMISD::WLSSETUP) | ||||
1877 | MAKE_CASE(ARMISD::LE) | ||||
1878 | MAKE_CASE(ARMISD::LOOP_DEC) | ||||
1879 | MAKE_CASE(ARMISD::CSINV) | ||||
1880 | MAKE_CASE(ARMISD::CSNEG) | ||||
1881 | MAKE_CASE(ARMISD::CSINC) | ||||
1882 | MAKE_CASE(ARMISD::MEMCPYLOOP) | ||||
1883 | MAKE_CASE(ARMISD::MEMSETLOOP) | ||||
1884 | #undef MAKE_CASE | ||||
1885 | } | ||||
1886 | return nullptr; | ||||
1887 | } | ||||
1888 | |||||
1889 | EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, | ||||
1890 | EVT VT) const { | ||||
1891 | if (!VT.isVector()) | ||||
1892 | return getPointerTy(DL); | ||||
1893 | |||||
1894 | // MVE has a predicate register. | ||||
1895 | if ((Subtarget->hasMVEIntegerOps() && | ||||
1896 | (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || | ||||
1897 | VT == MVT::v16i8)) || | ||||
1898 | (Subtarget->hasMVEFloatOps() && | ||||
1899 | (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16))) | ||||
1900 | return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); | ||||
1901 | return VT.changeVectorElementTypeToInteger(); | ||||
1902 | } | ||||
1903 | |||||
1904 | /// getRegClassFor - Return the register class that should be used for the | ||||
1905 | /// specified value type. | ||||
1906 | const TargetRegisterClass * | ||||
1907 | ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const { | ||||
1908 | (void)isDivergent; | ||||
1909 | // Map v4i64 to QQ registers but do not make the type legal. Similarly map | ||||
1910 | // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to | ||||
1911 | // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive | ||||
1912 | // MVE Q registers. | ||||
1913 | if (Subtarget->hasNEON()) { | ||||
1914 | if (VT == MVT::v4i64) | ||||
1915 | return &ARM::QQPRRegClass; | ||||
1916 | if (VT == MVT::v8i64) | ||||
1917 | return &ARM::QQQQPRRegClass; | ||||
1918 | } | ||||
1919 | if (Subtarget->hasMVEIntegerOps()) { | ||||
1920 | if (VT == MVT::v4i64) | ||||
1921 | return &ARM::MQQPRRegClass; | ||||
1922 | if (VT == MVT::v8i64) | ||||
1923 | return &ARM::MQQQQPRRegClass; | ||||
1924 | } | ||||
1925 | return TargetLowering::getRegClassFor(VT); | ||||
1926 | } | ||||
1927 | |||||
1928 | // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the | ||||
1929 | // source/dest is aligned and the copy size is large enough. We therefore want | ||||
1930 | // to align such objects passed to memory intrinsics. | ||||
1931 | bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, | ||||
1932 | Align &PrefAlign) const { | ||||
1933 | if (!isa<MemIntrinsic>(CI)) | ||||
1934 | return false; | ||||
1935 | MinSize = 8; | ||||
1936 | // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 | ||||
1937 | // cycle faster than 4-byte aligned LDM. | ||||
1938 | PrefAlign = | ||||
1939 | (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4)); | ||||
1940 | return true; | ||||
1941 | } | ||||
1942 | |||||
1943 | // Create a fast isel object. | ||||
1944 | FastISel * | ||||
1945 | ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, | ||||
1946 | const TargetLibraryInfo *libInfo) const { | ||||
1947 | return ARM::createFastISel(funcInfo, libInfo); | ||||
1948 | } | ||||
1949 | |||||
1950 | Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { | ||||
1951 | unsigned NumVals = N->getNumValues(); | ||||
1952 | if (!NumVals) | ||||
1953 | return Sched::RegPressure; | ||||
1954 | |||||
1955 | for (unsigned i = 0; i != NumVals; ++i) { | ||||
1956 | EVT VT = N->getValueType(i); | ||||
1957 | if (VT == MVT::Glue || VT == MVT::Other) | ||||
1958 | continue; | ||||
1959 | if (VT.isFloatingPoint() || VT.isVector()) | ||||
1960 | return Sched::ILP; | ||||
1961 | } | ||||
1962 | |||||
1963 | if (!N->isMachineOpcode()) | ||||
1964 | return Sched::RegPressure; | ||||
1965 | |||||
1966 | // Load are scheduled for latency even if there instruction itinerary | ||||
1967 | // is not available. | ||||
1968 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
1969 | const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); | ||||
1970 | |||||
1971 | if (MCID.getNumDefs() == 0) | ||||
1972 | return Sched::RegPressure; | ||||
1973 | if (!Itins->isEmpty() && | ||||
1974 | Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) | ||||
1975 | return Sched::ILP; | ||||
1976 | |||||
1977 | return Sched::RegPressure; | ||||
1978 | } | ||||
1979 | |||||
1980 | //===----------------------------------------------------------------------===// | ||||
1981 | // Lowering Code | ||||
1982 | //===----------------------------------------------------------------------===// | ||||
1983 | |||||
1984 | static bool isSRL16(const SDValue &Op) { | ||||
1985 | if (Op.getOpcode() != ISD::SRL) | ||||
1986 | return false; | ||||
1987 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
1988 | return Const->getZExtValue() == 16; | ||||
1989 | return false; | ||||
1990 | } | ||||
1991 | |||||
1992 | static bool isSRA16(const SDValue &Op) { | ||||
1993 | if (Op.getOpcode() != ISD::SRA) | ||||
1994 | return false; | ||||
1995 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
1996 | return Const->getZExtValue() == 16; | ||||
1997 | return false; | ||||
1998 | } | ||||
1999 | |||||
2000 | static bool isSHL16(const SDValue &Op) { | ||||
2001 | if (Op.getOpcode() != ISD::SHL) | ||||
2002 | return false; | ||||
2003 | if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) | ||||
2004 | return Const->getZExtValue() == 16; | ||||
2005 | return false; | ||||
2006 | } | ||||
2007 | |||||
2008 | // Check for a signed 16-bit value. We special case SRA because it makes it | ||||
2009 | // more simple when also looking for SRAs that aren't sign extending a | ||||
2010 | // smaller value. Without the check, we'd need to take extra care with | ||||
2011 | // checking order for some operations. | ||||
2012 | static bool isS16(const SDValue &Op, SelectionDAG &DAG) { | ||||
2013 | if (isSRA16(Op)) | ||||
2014 | return isSHL16(Op.getOperand(0)); | ||||
2015 | return DAG.ComputeNumSignBits(Op) == 17; | ||||
2016 | } | ||||
2017 | |||||
2018 | /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC | ||||
2019 | static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { | ||||
2020 | switch (CC) { | ||||
2021 | default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2021); | ||||
2022 | case ISD::SETNE: return ARMCC::NE; | ||||
2023 | case ISD::SETEQ: return ARMCC::EQ; | ||||
2024 | case ISD::SETGT: return ARMCC::GT; | ||||
2025 | case ISD::SETGE: return ARMCC::GE; | ||||
2026 | case ISD::SETLT: return ARMCC::LT; | ||||
2027 | case ISD::SETLE: return ARMCC::LE; | ||||
2028 | case ISD::SETUGT: return ARMCC::HI; | ||||
2029 | case ISD::SETUGE: return ARMCC::HS; | ||||
2030 | case ISD::SETULT: return ARMCC::LO; | ||||
2031 | case ISD::SETULE: return ARMCC::LS; | ||||
2032 | } | ||||
2033 | } | ||||
2034 | |||||
2035 | /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. | ||||
2036 | static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | ||||
2037 | ARMCC::CondCodes &CondCode2) { | ||||
2038 | CondCode2 = ARMCC::AL; | ||||
2039 | switch (CC) { | ||||
2040 | default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2040); | ||||
2041 | case ISD::SETEQ: | ||||
2042 | case ISD::SETOEQ: CondCode = ARMCC::EQ; break; | ||||
2043 | case ISD::SETGT: | ||||
2044 | case ISD::SETOGT: CondCode = ARMCC::GT; break; | ||||
2045 | case ISD::SETGE: | ||||
2046 | case ISD::SETOGE: CondCode = ARMCC::GE; break; | ||||
2047 | case ISD::SETOLT: CondCode = ARMCC::MI; break; | ||||
2048 | case ISD::SETOLE: CondCode = ARMCC::LS; break; | ||||
2049 | case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; | ||||
2050 | case ISD::SETO: CondCode = ARMCC::VC; break; | ||||
2051 | case ISD::SETUO: CondCode = ARMCC::VS; break; | ||||
2052 | case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; | ||||
2053 | case ISD::SETUGT: CondCode = ARMCC::HI; break; | ||||
2054 | case ISD::SETUGE: CondCode = ARMCC::PL; break; | ||||
2055 | case ISD::SETLT: | ||||
2056 | case ISD::SETULT: CondCode = ARMCC::LT; break; | ||||
2057 | case ISD::SETLE: | ||||
2058 | case ISD::SETULE: CondCode = ARMCC::LE; break; | ||||
2059 | case ISD::SETNE: | ||||
2060 | case ISD::SETUNE: CondCode = ARMCC::NE; break; | ||||
2061 | } | ||||
2062 | } | ||||
2063 | |||||
2064 | //===----------------------------------------------------------------------===// | ||||
2065 | // Calling Convention Implementation | ||||
2066 | //===----------------------------------------------------------------------===// | ||||
2067 | |||||
2068 | /// getEffectiveCallingConv - Get the effective calling convention, taking into | ||||
2069 | /// account presence of floating point hardware and calling convention | ||||
2070 | /// limitations, such as support for variadic functions. | ||||
2071 | CallingConv::ID | ||||
2072 | ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, | ||||
2073 | bool isVarArg) const { | ||||
2074 | switch (CC) { | ||||
2075 | default: | ||||
2076 | report_fatal_error("Unsupported calling convention"); | ||||
2077 | case CallingConv::ARM_AAPCS: | ||||
2078 | case CallingConv::ARM_APCS: | ||||
2079 | case CallingConv::GHC: | ||||
2080 | case CallingConv::CFGuard_Check: | ||||
2081 | return CC; | ||||
2082 | case CallingConv::PreserveMost: | ||||
2083 | return CallingConv::PreserveMost; | ||||
2084 | case CallingConv::ARM_AAPCS_VFP: | ||||
2085 | case CallingConv::Swift: | ||||
2086 | case CallingConv::SwiftTail: | ||||
2087 | return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; | ||||
2088 | case CallingConv::C: | ||||
2089 | case CallingConv::Tail: | ||||
2090 | if (!Subtarget->isAAPCS_ABI()) | ||||
2091 | return CallingConv::ARM_APCS; | ||||
2092 | else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && | ||||
2093 | getTargetMachine().Options.FloatABIType == FloatABI::Hard && | ||||
2094 | !isVarArg) | ||||
2095 | return CallingConv::ARM_AAPCS_VFP; | ||||
2096 | else | ||||
2097 | return CallingConv::ARM_AAPCS; | ||||
2098 | case CallingConv::Fast: | ||||
2099 | case CallingConv::CXX_FAST_TLS: | ||||
2100 | if (!Subtarget->isAAPCS_ABI()) { | ||||
2101 | if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) | ||||
2102 | return CallingConv::Fast; | ||||
2103 | return CallingConv::ARM_APCS; | ||||
2104 | } else if (Subtarget->hasVFP2Base() && | ||||
2105 | !Subtarget->isThumb1Only() && !isVarArg) | ||||
2106 | return CallingConv::ARM_AAPCS_VFP; | ||||
2107 | else | ||||
2108 | return CallingConv::ARM_AAPCS; | ||||
2109 | } | ||||
2110 | } | ||||
2111 | |||||
2112 | CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, | ||||
2113 | bool isVarArg) const { | ||||
2114 | return CCAssignFnForNode(CC, false, isVarArg); | ||||
2115 | } | ||||
2116 | |||||
2117 | CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, | ||||
2118 | bool isVarArg) const { | ||||
2119 | return CCAssignFnForNode(CC, true, isVarArg); | ||||
2120 | } | ||||
2121 | |||||
2122 | /// CCAssignFnForNode - Selects the correct CCAssignFn for the given | ||||
2123 | /// CallingConvention. | ||||
2124 | CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, | ||||
2125 | bool Return, | ||||
2126 | bool isVarArg) const { | ||||
2127 | switch (getEffectiveCallingConv(CC, isVarArg)) { | ||||
2128 | default: | ||||
2129 | report_fatal_error("Unsupported calling convention"); | ||||
2130 | case CallingConv::ARM_APCS: | ||||
2131 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); | ||||
2132 | case CallingConv::ARM_AAPCS: | ||||
2133 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | ||||
2134 | case CallingConv::ARM_AAPCS_VFP: | ||||
2135 | return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); | ||||
2136 | case CallingConv::Fast: | ||||
2137 | return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); | ||||
2138 | case CallingConv::GHC: | ||||
2139 | return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); | ||||
2140 | case CallingConv::PreserveMost: | ||||
2141 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); | ||||
2142 | case CallingConv::CFGuard_Check: | ||||
2143 | return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check); | ||||
2144 | } | ||||
2145 | } | ||||
2146 | |||||
2147 | SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, | ||||
2148 | MVT LocVT, MVT ValVT, SDValue Val) const { | ||||
2149 | Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()), | ||||
2150 | Val); | ||||
2151 | if (Subtarget->hasFullFP16()) { | ||||
2152 | Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val); | ||||
2153 | } else { | ||||
2154 | Val = DAG.getNode(ISD::TRUNCATE, dl, | ||||
2155 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | ||||
2156 | Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val); | ||||
2157 | } | ||||
2158 | return Val; | ||||
2159 | } | ||||
2160 | |||||
2161 | SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, | ||||
2162 | MVT LocVT, MVT ValVT, | ||||
2163 | SDValue Val) const { | ||||
2164 | if (Subtarget->hasFullFP16()) { | ||||
2165 | Val = DAG.getNode(ARMISD::VMOVrh, dl, | ||||
2166 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | ||||
2167 | } else { | ||||
2168 | Val = DAG.getNode(ISD::BITCAST, dl, | ||||
2169 | MVT::getIntegerVT(ValVT.getSizeInBits()), Val); | ||||
2170 | Val = DAG.getNode(ISD::ZERO_EXTEND, dl, | ||||
2171 | MVT::getIntegerVT(LocVT.getSizeInBits()), Val); | ||||
2172 | } | ||||
2173 | return DAG.getNode(ISD::BITCAST, dl, LocVT, Val); | ||||
2174 | } | ||||
2175 | |||||
2176 | /// LowerCallResult - Lower the result values of a call into the | ||||
2177 | /// appropriate copies out of appropriate physical registers. | ||||
2178 | SDValue ARMTargetLowering::LowerCallResult( | ||||
2179 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, | ||||
2180 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | ||||
2181 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, | ||||
2182 | SDValue ThisVal) const { | ||||
2183 | // Assign locations to each value returned by this call. | ||||
2184 | SmallVector<CCValAssign, 16> RVLocs; | ||||
2185 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | ||||
2186 | *DAG.getContext()); | ||||
2187 | CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
2188 | |||||
2189 | // Copy all of the result registers out of their specified physreg. | ||||
2190 | for (unsigned i = 0; i != RVLocs.size(); ++i) { | ||||
2191 | CCValAssign VA = RVLocs[i]; | ||||
2192 | |||||
2193 | // Pass 'this' value directly from the argument to return value, to avoid | ||||
2194 | // reg unit interference | ||||
2195 | if (i == 0 && isThisReturn) { | ||||
2196 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2197, __extension__ __PRETTY_FUNCTION__)) | ||||
2197 | "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT () == MVT::i32 && "unexpected return calling convention register assignment" ) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2197, __extension__ __PRETTY_FUNCTION__)); | ||||
2198 | InVals.push_back(ThisVal); | ||||
2199 | continue; | ||||
2200 | } | ||||
2201 | |||||
2202 | SDValue Val; | ||||
2203 | if (VA.needsCustom() && | ||||
2204 | (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) { | ||||
2205 | // Handle f64 or half of a v2f64. | ||||
2206 | SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | ||||
2207 | InFlag); | ||||
2208 | Chain = Lo.getValue(1); | ||||
2209 | InFlag = Lo.getValue(2); | ||||
2210 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2211 | SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, | ||||
2212 | InFlag); | ||||
2213 | Chain = Hi.getValue(1); | ||||
2214 | InFlag = Hi.getValue(2); | ||||
2215 | if (!Subtarget->isLittle()) | ||||
2216 | std::swap (Lo, Hi); | ||||
2217 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | ||||
2218 | |||||
2219 | if (VA.getLocVT() == MVT::v2f64) { | ||||
2220 | SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | ||||
2221 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | ||||
2222 | DAG.getConstant(0, dl, MVT::i32)); | ||||
2223 | |||||
2224 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2225 | Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | ||||
2226 | Chain = Lo.getValue(1); | ||||
2227 | InFlag = Lo.getValue(2); | ||||
2228 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
2229 | Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); | ||||
2230 | Chain = Hi.getValue(1); | ||||
2231 | InFlag = Hi.getValue(2); | ||||
2232 | if (!Subtarget->isLittle()) | ||||
2233 | std::swap (Lo, Hi); | ||||
2234 | Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); | ||||
2235 | Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, | ||||
2236 | DAG.getConstant(1, dl, MVT::i32)); | ||||
2237 | } | ||||
2238 | } else { | ||||
2239 | Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), | ||||
2240 | InFlag); | ||||
2241 | Chain = Val.getValue(1); | ||||
2242 | InFlag = Val.getValue(2); | ||||
2243 | } | ||||
2244 | |||||
2245 | switch (VA.getLocInfo()) { | ||||
2246 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2246); | ||||
2247 | case CCValAssign::Full: break; | ||||
2248 | case CCValAssign::BCvt: | ||||
2249 | Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); | ||||
2250 | break; | ||||
2251 | } | ||||
2252 | |||||
2253 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
2254 | // had been copied to the LSBs of a 32-bit register. | ||||
2255 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
2256 | if (VA.needsCustom() && | ||||
2257 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | ||||
2258 | Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val); | ||||
2259 | |||||
2260 | InVals.push_back(Val); | ||||
2261 | } | ||||
2262 | |||||
2263 | return Chain; | ||||
2264 | } | ||||
2265 | |||||
2266 | std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( | ||||
2267 | const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr, | ||||
2268 | bool IsTailCall, int SPDiff) const { | ||||
2269 | SDValue DstAddr; | ||||
2270 | MachinePointerInfo DstInfo; | ||||
2271 | int32_t Offset = VA.getLocMemOffset(); | ||||
2272 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2273 | |||||
2274 | if (IsTailCall) { | ||||
2275 | Offset += SPDiff; | ||||
2276 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
2277 | int Size = VA.getLocVT().getFixedSizeInBits() / 8; | ||||
2278 | int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); | ||||
2279 | DstAddr = DAG.getFrameIndex(FI, PtrVT); | ||||
2280 | DstInfo = | ||||
2281 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); | ||||
2282 | } else { | ||||
2283 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); | ||||
2284 | DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | ||||
2285 | StackPtr, PtrOff); | ||||
2286 | DstInfo = | ||||
2287 | MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset); | ||||
2288 | } | ||||
2289 | |||||
2290 | return std::make_pair(DstAddr, DstInfo); | ||||
2291 | } | ||||
2292 | |||||
2293 | void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, | ||||
2294 | SDValue Chain, SDValue &Arg, | ||||
2295 | RegsToPassVector &RegsToPass, | ||||
2296 | CCValAssign &VA, CCValAssign &NextVA, | ||||
2297 | SDValue &StackPtr, | ||||
2298 | SmallVectorImpl<SDValue> &MemOpChains, | ||||
2299 | bool IsTailCall, | ||||
2300 | int SPDiff) const { | ||||
2301 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
2302 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | ||||
2303 | unsigned id = Subtarget->isLittle() ? 0 : 1; | ||||
2304 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); | ||||
2305 | |||||
2306 | if (NextVA.isRegLoc()) | ||||
2307 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); | ||||
2308 | else { | ||||
2309 | assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail ("NextVA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2309, __extension__ __PRETTY_FUNCTION__)); | ||||
2310 | if (!StackPtr.getNode()) | ||||
2311 | StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, | ||||
2312 | getPointerTy(DAG.getDataLayout())); | ||||
2313 | |||||
2314 | SDValue DstAddr; | ||||
2315 | MachinePointerInfo DstInfo; | ||||
2316 | std::tie(DstAddr, DstInfo) = | ||||
2317 | computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff); | ||||
2318 | MemOpChains.push_back( | ||||
2319 | DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo)); | ||||
2320 | } | ||||
2321 | } | ||||
2322 | |||||
2323 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { | ||||
2324 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || | ||||
2325 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; | ||||
2326 | } | ||||
2327 | |||||
2328 | /// LowerCall - Lowering a call into a callseq_start <- | ||||
2329 | /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter | ||||
2330 | /// nodes. | ||||
2331 | SDValue | ||||
2332 | ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||||
2333 | SmallVectorImpl<SDValue> &InVals) const { | ||||
2334 | SelectionDAG &DAG = CLI.DAG; | ||||
2335 | SDLoc &dl = CLI.DL; | ||||
2336 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; | ||||
2337 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; | ||||
2338 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; | ||||
2339 | SDValue Chain = CLI.Chain; | ||||
2340 | SDValue Callee = CLI.Callee; | ||||
2341 | bool &isTailCall = CLI.IsTailCall; | ||||
2342 | CallingConv::ID CallConv = CLI.CallConv; | ||||
2343 | bool doesNotRet = CLI.DoesNotReturn; | ||||
2344 | bool isVarArg = CLI.IsVarArg; | ||||
2345 | |||||
2346 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2347 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
2348 | MachineFunction::CallSiteInfo CSInfo; | ||||
2349 | bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); | ||||
| |||||
2350 | bool isThisReturn = false; | ||||
2351 | bool isCmseNSCall = false; | ||||
2352 | bool isSibCall = false; | ||||
2353 | bool PreferIndirect = false; | ||||
2354 | bool GuardWithBTI = false; | ||||
2355 | |||||
2356 | // Lower 'returns_twice' calls to a pseudo-instruction. | ||||
2357 | if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && | ||||
2358 | !Subtarget->noBTIAtReturnTwice()) | ||||
2359 | GuardWithBTI = AFI->branchTargetEnforcement(); | ||||
2360 | |||||
2361 | // Determine whether this is a non-secure function call. | ||||
2362 | if (CLI.CB
| ||||
2363 | isCmseNSCall = true; | ||||
2364 | |||||
2365 | // Disable tail calls if they're not supported. | ||||
2366 | if (!Subtarget->supportsTailCall()) | ||||
2367 | isTailCall = false; | ||||
2368 | |||||
2369 | // For both the non-secure calls and the returns from a CMSE entry function, | ||||
2370 | // the function needs to do some extra work afte r the call, or before the | ||||
2371 | // return, respectively, thus it cannot end with atail call | ||||
2372 | if (isCmseNSCall
| ||||
2373 | isTailCall = false; | ||||
2374 | |||||
2375 | if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2376 | // If we're optimizing for minimum size and the function is called three or | ||||
2377 | // more times in this block, we can improve codesize by calling indirectly | ||||
2378 | // as BLXr has a 16-bit encoding. | ||||
2379 | auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); | ||||
2380 | if (CLI.CB) { | ||||
2381 | auto *BB = CLI.CB->getParent(); | ||||
2382 | PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() && | ||||
2383 | count_if(GV->users(), [&BB](const User *U) { | ||||
2384 | return isa<Instruction>(U) && | ||||
2385 | cast<Instruction>(U)->getParent() == BB; | ||||
2386 | }) > 2; | ||||
2387 | } | ||||
2388 | } | ||||
2389 | if (isTailCall) { | ||||
2390 | // Check if it's really possible to do a tail call. | ||||
2391 | isTailCall = IsEligibleForTailCallOptimization( | ||||
2392 | Callee, CallConv, isVarArg, isStructRet, | ||||
2393 | MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG, | ||||
2394 | PreferIndirect); | ||||
2395 | |||||
2396 | if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt && | ||||
2397 | CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail) | ||||
2398 | isSibCall = true; | ||||
2399 | |||||
2400 | // We don't support GuaranteedTailCallOpt for ARM, only automatically | ||||
2401 | // detected sibcalls. | ||||
2402 | if (isTailCall) | ||||
2403 | ++NumTailCalls; | ||||
2404 | } | ||||
2405 | |||||
2406 | if (!isTailCall
| ||||
2407 | report_fatal_error("failed to perform tail call elimination on a call " | ||||
2408 | "site marked musttail"); | ||||
2409 | // Analyze operands of the call, assigning locations to each operand. | ||||
2410 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
2411 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | ||||
2412 | *DAG.getContext()); | ||||
2413 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); | ||||
2414 | |||||
2415 | // Get a count of how many bytes are to be pushed on the stack. | ||||
2416 | unsigned NumBytes = CCInfo.getNextStackOffset(); | ||||
2417 | |||||
2418 | // SPDiff is the byte offset of the call's argument area from the callee's. | ||||
2419 | // Stores to callee stack arguments will be placed in FixedStackSlots offset | ||||
2420 | // by this amount for a tail call. In a sibling call it must be 0 because the | ||||
2421 | // caller will deallocate the entire stack and the callee still expects its | ||||
2422 | // arguments to begin at SP+0. Completely unused for non-tail calls. | ||||
2423 | int SPDiff = 0; | ||||
2424 | |||||
2425 | if (isTailCall && !isSibCall) { | ||||
2426 | auto FuncInfo = MF.getInfo<ARMFunctionInfo>(); | ||||
2427 | unsigned NumReusableBytes = FuncInfo->getArgumentStackSize(); | ||||
2428 | |||||
2429 | // Since callee will pop argument stack as a tail call, we must keep the | ||||
2430 | // popped size 16-byte aligned. | ||||
2431 | Align StackAlign = DAG.getDataLayout().getStackAlignment(); | ||||
2432 | NumBytes = alignTo(NumBytes, StackAlign); | ||||
2433 | |||||
2434 | // SPDiff will be negative if this tail call requires more space than we | ||||
2435 | // would automatically have in our incoming argument space. Positive if we | ||||
2436 | // can actually shrink the stack. | ||||
2437 | SPDiff = NumReusableBytes - NumBytes; | ||||
2438 | |||||
2439 | // If this call requires more stack than we have available from | ||||
2440 | // LowerFormalArguments, tell FrameLowering to reserve space for it. | ||||
2441 | if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff) | ||||
2442 | AFI->setArgRegsSaveSize(-SPDiff); | ||||
2443 | } | ||||
2444 | |||||
2445 | if (isSibCall
| ||||
2446 | // For sibling tail calls, memory operands are available in our caller's stack. | ||||
2447 | NumBytes = 0; | ||||
2448 | } else { | ||||
2449 | // Adjust the stack pointer for the new arguments... | ||||
2450 | // These operations are automatically eliminated by the prolog/epilog pass | ||||
2451 | Chain = DAG.getCALLSEQ_START(Chain, isTailCall
| ||||
2452 | } | ||||
2453 | |||||
2454 | SDValue StackPtr = | ||||
2455 | DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); | ||||
2456 | |||||
2457 | RegsToPassVector RegsToPass; | ||||
2458 | SmallVector<SDValue, 8> MemOpChains; | ||||
2459 | |||||
2460 | // During a tail call, stores to the argument area must happen after all of | ||||
2461 | // the function's incoming arguments have been loaded because they may alias. | ||||
2462 | // This is done by folding in a TokenFactor from LowerFormalArguments, but | ||||
2463 | // there's no point in doing so repeatedly so this tracks whether that's | ||||
2464 | // happened yet. | ||||
2465 | bool AfterFormalArgLoads = false; | ||||
2466 | |||||
2467 | // Walk the register/memloc assignments, inserting copies/loads. In the case | ||||
2468 | // of tail call optimization, arguments are handled later. | ||||
2469 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | ||||
2470 | i != e; | ||||
2471 | ++i, ++realArgIdx) { | ||||
2472 | CCValAssign &VA = ArgLocs[i]; | ||||
2473 | SDValue Arg = OutVals[realArgIdx]; | ||||
2474 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | ||||
2475 | bool isByVal = Flags.isByVal(); | ||||
2476 | |||||
2477 | // Promote the value if needed. | ||||
2478 | switch (VA.getLocInfo()) { | ||||
2479 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 2479); | ||||
2480 | case CCValAssign::Full: break; | ||||
2481 | case CCValAssign::SExt: | ||||
2482 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2483 | break; | ||||
2484 | case CCValAssign::ZExt: | ||||
2485 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2486 | break; | ||||
2487 | case CCValAssign::AExt: | ||||
2488 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); | ||||
2489 | break; | ||||
2490 | case CCValAssign::BCvt: | ||||
2491 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
2492 | break; | ||||
2493 | } | ||||
2494 | |||||
2495 | if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { | ||||
2496 | Chain = DAG.getStackArgumentTokenFactor(Chain); | ||||
2497 | AfterFormalArgLoads = true; | ||||
2498 | } | ||||
2499 | |||||
2500 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
2501 | // had been copied to the LSBs of a 32-bit register. | ||||
2502 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
2503 | if (VA.needsCustom() && | ||||
2504 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) { | ||||
2505 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | ||||
2506 | } else { | ||||
2507 | // f16 arguments could have been extended prior to argument lowering. | ||||
2508 | // Mask them arguments if this is a CMSE nonsecure call. | ||||
2509 | auto ArgVT = Outs[realArgIdx].ArgVT; | ||||
2510 | if (isCmseNSCall && (ArgVT == MVT::f16)) { | ||||
2511 | auto LocBits = VA.getLocVT().getSizeInBits(); | ||||
2512 | auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits()); | ||||
2513 | SDValue Mask = | ||||
2514 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | ||||
2515 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | ||||
2516 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | ||||
2517 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
2518 | } | ||||
2519 | } | ||||
2520 | |||||
2521 | // f64 and v2f64 might be passed in i32 pairs and must be split into pieces | ||||
2522 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | ||||
2523 | SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
2524 | DAG.getConstant(0, dl, MVT::i32)); | ||||
2525 | SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
2526 | DAG.getConstant(1, dl, MVT::i32)); | ||||
2527 | |||||
2528 | PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], | ||||
2529 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2530 | |||||
2531 | VA = ArgLocs[++i]; // skip ahead to next loc | ||||
2532 | if (VA.isRegLoc()) { | ||||
2533 | PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], | ||||
2534 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2535 | } else { | ||||
2536 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2536, __extension__ __PRETTY_FUNCTION__)); | ||||
2537 | SDValue DstAddr; | ||||
2538 | MachinePointerInfo DstInfo; | ||||
2539 | std::tie(DstAddr, DstInfo) = | ||||
2540 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2541 | MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo)); | ||||
2542 | } | ||||
2543 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | ||||
2544 | PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], | ||||
2545 | StackPtr, MemOpChains, isTailCall, SPDiff); | ||||
2546 | } else if (VA.isRegLoc()) { | ||||
2547 | if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && | ||||
2548 | Outs[0].VT == MVT::i32) { | ||||
2549 | assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2550, __extension__ __PRETTY_FUNCTION__)) | ||||
2550 | "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment") ? void ( 0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2550, __extension__ __PRETTY_FUNCTION__)); | ||||
2551 | assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2552, __extension__ __PRETTY_FUNCTION__)) | ||||
2552 | "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'") ? void ( 0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2552, __extension__ __PRETTY_FUNCTION__)); | ||||
2553 | isThisReturn = true; | ||||
2554 | } | ||||
2555 | const TargetOptions &Options = DAG.getTarget().Options; | ||||
2556 | if (Options.EmitCallSiteInfo) | ||||
2557 | CSInfo.emplace_back(VA.getLocReg(), i); | ||||
2558 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | ||||
2559 | } else if (isByVal) { | ||||
2560 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2560, __extension__ __PRETTY_FUNCTION__)); | ||||
2561 | unsigned offset = 0; | ||||
2562 | |||||
2563 | // True if this byval aggregate will be split between registers | ||||
2564 | // and memory. | ||||
2565 | unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); | ||||
2566 | unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); | ||||
2567 | |||||
2568 | if (CurByValIdx < ByValArgsCount) { | ||||
2569 | |||||
2570 | unsigned RegBegin, RegEnd; | ||||
2571 | CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); | ||||
2572 | |||||
2573 | EVT PtrVT = | ||||
2574 | DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | ||||
2575 | unsigned int i, j; | ||||
2576 | for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { | ||||
2577 | SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); | ||||
2578 | SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); | ||||
2579 | SDValue Load = | ||||
2580 | DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), | ||||
2581 | DAG.InferPtrAlign(AddArg)); | ||||
2582 | MemOpChains.push_back(Load.getValue(1)); | ||||
2583 | RegsToPass.push_back(std::make_pair(j, Load)); | ||||
2584 | } | ||||
2585 | |||||
2586 | // If parameter size outsides register area, "offset" value | ||||
2587 | // helps us to calculate stack slot for remained part properly. | ||||
2588 | offset = RegEnd - RegBegin; | ||||
2589 | |||||
2590 | CCInfo.nextInRegsParam(); | ||||
2591 | } | ||||
2592 | |||||
2593 | if (Flags.getByValSize() > 4*offset) { | ||||
2594 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
2595 | SDValue Dst; | ||||
2596 | MachinePointerInfo DstInfo; | ||||
2597 | std::tie(Dst, DstInfo) = | ||||
2598 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2599 | SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); | ||||
2600 | SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); | ||||
2601 | SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, | ||||
2602 | MVT::i32); | ||||
2603 | SDValue AlignNode = | ||||
2604 | DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32); | ||||
2605 | |||||
2606 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
2607 | SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; | ||||
2608 | MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, | ||||
2609 | Ops)); | ||||
2610 | } | ||||
2611 | } else { | ||||
2612 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2612, __extension__ __PRETTY_FUNCTION__)); | ||||
2613 | SDValue DstAddr; | ||||
2614 | MachinePointerInfo DstInfo; | ||||
2615 | std::tie(DstAddr, DstInfo) = | ||||
2616 | computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); | ||||
2617 | |||||
2618 | SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo); | ||||
2619 | MemOpChains.push_back(Store); | ||||
2620 | } | ||||
2621 | } | ||||
2622 | |||||
2623 | if (!MemOpChains.empty()) | ||||
2624 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); | ||||
2625 | |||||
2626 | // Build a sequence of copy-to-reg nodes chained together with token chain | ||||
2627 | // and flag operands which copy the outgoing args into the appropriate regs. | ||||
2628 | SDValue InFlag; | ||||
2629 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { | ||||
2630 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, | ||||
2631 | RegsToPass[i].second, InFlag); | ||||
2632 | InFlag = Chain.getValue(1); | ||||
2633 | } | ||||
2634 | |||||
2635 | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every | ||||
2636 | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol | ||||
2637 | // node so that legalize doesn't hack it. | ||||
2638 | bool isDirect = false; | ||||
2639 | |||||
2640 | const TargetMachine &TM = getTargetMachine(); | ||||
2641 | const Module *Mod = MF.getFunction().getParent(); | ||||
2642 | const GlobalValue *GVal = nullptr; | ||||
2643 | if (GlobalAddressSDNode *G
| ||||
2644 | GVal = G->getGlobal(); | ||||
2645 | bool isStub = | ||||
2646 | !TM.shouldAssumeDSOLocal(*Mod, GVal) && Subtarget->isTargetMachO(); | ||||
2647 | |||||
2648 | bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); | ||||
2649 | bool isLocalARMFunc = false; | ||||
2650 | auto PtrVt = getPointerTy(DAG.getDataLayout()); | ||||
2651 | |||||
2652 | if (Subtarget->genLongCalls()) { | ||||
2653 | assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2654, __extension__ __PRETTY_FUNCTION__)) | ||||
2654 | "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget ->isTargetWindows()) && "long-calls codegen is not position independent!" ) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2654, __extension__ __PRETTY_FUNCTION__)); | ||||
2655 | // Handle a global address or an external symbol. If it's not one of | ||||
2656 | // those, the target's already in a register, so we don't need to do | ||||
2657 | // anything extra. | ||||
2658 | if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2659 | // When generating execute-only code we use movw movt pair. | ||||
2660 | // Currently execute-only is only available for architectures that | ||||
2661 | // support movw movt, so we are safe to assume that. | ||||
2662 | if (Subtarget->genExecuteOnly()) { | ||||
2663 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2664, __extension__ __PRETTY_FUNCTION__)) | ||||
2664 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2664, __extension__ __PRETTY_FUNCTION__)); | ||||
2665 | ++NumMovwMovt; | ||||
2666 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, | ||||
2667 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); | ||||
2668 | } else { | ||||
2669 | // Create a constant pool entry for the callee address | ||||
2670 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2671 | ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( | ||||
2672 | GVal, ARMPCLabelIndex, ARMCP::CPValue, 0); | ||||
2673 | |||||
2674 | // Get the address of the callee into a register | ||||
2675 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2676 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); | ||||
2677 | Callee = DAG.getLoad( | ||||
2678 | PtrVt, dl, DAG.getEntryNode(), Addr, | ||||
2679 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2680 | } | ||||
2681 | } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { | ||||
2682 | const char *Sym = S->getSymbol(); | ||||
2683 | |||||
2684 | // When generating execute-only code we use movw movt pair. | ||||
2685 | // Currently execute-only is only available for architectures that | ||||
2686 | // support movw movt, so we are safe to assume that. | ||||
2687 | if (Subtarget->genExecuteOnly()) { | ||||
2688 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2689, __extension__ __PRETTY_FUNCTION__)) | ||||
2689 | "long-calls with execute-only requires movt and movw!")(static_cast <bool> (Subtarget->useMovt() && "long-calls with execute-only requires movt and movw!") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"long-calls with execute-only requires movt and movw!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2689, __extension__ __PRETTY_FUNCTION__)); | ||||
2690 | ++NumMovwMovt; | ||||
2691 | Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt, | ||||
2692 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt)); | ||||
2693 | } else { | ||||
2694 | // Create a constant pool entry for the callee address | ||||
2695 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2696 | ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( | ||||
2697 | *DAG.getContext(), Sym, ARMPCLabelIndex, 0); | ||||
2698 | |||||
2699 | // Get the address of the callee into a register | ||||
2700 | SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2701 | Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr); | ||||
2702 | Callee = DAG.getLoad( | ||||
2703 | PtrVt, dl, DAG.getEntryNode(), Addr, | ||||
2704 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2705 | } | ||||
2706 | } | ||||
2707 | } else if (isa<GlobalAddressSDNode>(Callee)) { | ||||
2708 | if (!PreferIndirect
| ||||
2709 | isDirect = true; | ||||
2710 | bool isDef = GVal->isStrongDefinitionForLinker(); | ||||
| |||||
2711 | |||||
2712 | // ARM call to a local ARM function is predicable. | ||||
2713 | isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); | ||||
2714 | // tBX takes a register source operand. | ||||
2715 | if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | ||||
2716 | assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?") ? void (0) : __assert_fail ( "Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2716, __extension__ __PRETTY_FUNCTION__)); | ||||
2717 | Callee = DAG.getNode( | ||||
2718 | ARMISD::WrapperPIC, dl, PtrVt, | ||||
2719 | DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY)); | ||||
2720 | Callee = DAG.getLoad( | ||||
2721 | PtrVt, dl, DAG.getEntryNode(), Callee, | ||||
2722 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(), | ||||
2723 | MachineMemOperand::MODereferenceable | | ||||
2724 | MachineMemOperand::MOInvariant); | ||||
2725 | } else if (Subtarget->isTargetCOFF()) { | ||||
2726 | assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2727, __extension__ __PRETTY_FUNCTION__)) | ||||
2727 | "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows is the only supported COFF target") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2727, __extension__ __PRETTY_FUNCTION__)); | ||||
2728 | unsigned TargetFlags = ARMII::MO_NO_FLAG; | ||||
2729 | if (GVal->hasDLLImportStorageClass()) | ||||
2730 | TargetFlags = ARMII::MO_DLLIMPORT; | ||||
2731 | else if (!TM.shouldAssumeDSOLocal(*GVal->getParent(), GVal)) | ||||
2732 | TargetFlags = ARMII::MO_COFFSTUB; | ||||
2733 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0, | ||||
2734 | TargetFlags); | ||||
2735 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | ||||
2736 | Callee = | ||||
2737 | DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), | ||||
2738 | DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), | ||||
2739 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
2740 | } else { | ||||
2741 | Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0); | ||||
2742 | } | ||||
2743 | } | ||||
2744 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { | ||||
2745 | isDirect = true; | ||||
2746 | // tBX takes a register source operand. | ||||
2747 | const char *Sym = S->getSymbol(); | ||||
2748 | if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { | ||||
2749 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
2750 | ARMConstantPoolValue *CPV = | ||||
2751 | ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, | ||||
2752 | ARMPCLabelIndex, 4); | ||||
2753 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4)); | ||||
2754 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
2755 | Callee = DAG.getLoad( | ||||
2756 | PtrVt, dl, DAG.getEntryNode(), CPAddr, | ||||
2757 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
2758 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
2759 | Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); | ||||
2760 | } else { | ||||
2761 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); | ||||
2762 | } | ||||
2763 | } | ||||
2764 | |||||
2765 | if (isCmseNSCall) { | ||||
2766 | assert(!isARMFunc && !isDirect &&(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2767, __extension__ __PRETTY_FUNCTION__)) | ||||
2767 | "Cannot handle call to ARM function or direct call")(static_cast <bool> (!isARMFunc && !isDirect && "Cannot handle call to ARM function or direct call") ? void ( 0) : __assert_fail ("!isARMFunc && !isDirect && \"Cannot handle call to ARM function or direct call\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2767, __extension__ __PRETTY_FUNCTION__)); | ||||
2768 | if (NumBytes > 0) { | ||||
2769 | DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(), | ||||
2770 | "call to non-secure function would " | ||||
2771 | "require passing arguments on stack", | ||||
2772 | dl.getDebugLoc()); | ||||
2773 | DAG.getContext()->diagnose(Diag); | ||||
2774 | } | ||||
2775 | if (isStructRet) { | ||||
2776 | DiagnosticInfoUnsupported Diag( | ||||
2777 | DAG.getMachineFunction().getFunction(), | ||||
2778 | "call to non-secure function would return value through pointer", | ||||
2779 | dl.getDebugLoc()); | ||||
2780 | DAG.getContext()->diagnose(Diag); | ||||
2781 | } | ||||
2782 | } | ||||
2783 | |||||
2784 | // FIXME: handle tail calls differently. | ||||
2785 | unsigned CallOpc; | ||||
2786 | if (Subtarget->isThumb()) { | ||||
2787 | if (GuardWithBTI) | ||||
2788 | CallOpc = ARMISD::t2CALL_BTI; | ||||
2789 | else if (isCmseNSCall) | ||||
2790 | CallOpc = ARMISD::tSECALL; | ||||
2791 | else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) | ||||
2792 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2793 | else | ||||
2794 | CallOpc = ARMISD::CALL; | ||||
2795 | } else { | ||||
2796 | if (!isDirect && !Subtarget->hasV5TOps()) | ||||
2797 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2798 | else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && | ||||
2799 | // Emit regular call when code size is the priority | ||||
2800 | !Subtarget->hasMinSize()) | ||||
2801 | // "mov lr, pc; b _foo" to avoid confusing the RSP | ||||
2802 | CallOpc = ARMISD::CALL_NOLINK; | ||||
2803 | else | ||||
2804 | CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; | ||||
2805 | } | ||||
2806 | |||||
2807 | // We don't usually want to end the call-sequence here because we would tidy | ||||
2808 | // the frame up *after* the call, however in the ABI-changing tail-call case | ||||
2809 | // we've carefully laid out the parameters so that when sp is reset they'll be | ||||
2810 | // in the correct location. | ||||
2811 | if (isTailCall && !isSibCall) { | ||||
2812 | Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl); | ||||
2813 | InFlag = Chain.getValue(1); | ||||
2814 | } | ||||
2815 | |||||
2816 | std::vector<SDValue> Ops; | ||||
2817 | Ops.push_back(Chain); | ||||
2818 | Ops.push_back(Callee); | ||||
2819 | |||||
2820 | if (isTailCall) { | ||||
2821 | Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32)); | ||||
2822 | } | ||||
2823 | |||||
2824 | // Add argument registers to the end of the list so that they are known live | ||||
2825 | // into the call. | ||||
2826 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) | ||||
2827 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, | ||||
2828 | RegsToPass[i].second.getValueType())); | ||||
2829 | |||||
2830 | // Add a register mask operand representing the call-preserved registers. | ||||
2831 | const uint32_t *Mask; | ||||
2832 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | ||||
2833 | if (isThisReturn) { | ||||
2834 | // For 'this' returns, use the R0-preserving mask if applicable | ||||
2835 | Mask = ARI->getThisReturnPreservedMask(MF, CallConv); | ||||
2836 | if (!Mask) { | ||||
2837 | // Set isThisReturn to false if the calling convention is not one that | ||||
2838 | // allows 'returned' to be modeled in this way, so LowerCallResult does | ||||
2839 | // not try to pass 'this' straight through | ||||
2840 | isThisReturn = false; | ||||
2841 | Mask = ARI->getCallPreservedMask(MF, CallConv); | ||||
2842 | } | ||||
2843 | } else | ||||
2844 | Mask = ARI->getCallPreservedMask(MF, CallConv); | ||||
2845 | |||||
2846 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2846, __extension__ __PRETTY_FUNCTION__)); | ||||
2847 | Ops.push_back(DAG.getRegisterMask(Mask)); | ||||
2848 | |||||
2849 | if (InFlag.getNode()) | ||||
2850 | Ops.push_back(InFlag); | ||||
2851 | |||||
2852 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
2853 | if (isTailCall) { | ||||
2854 | MF.getFrameInfo().setHasTailCall(); | ||||
2855 | SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); | ||||
2856 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); | ||||
2857 | return Ret; | ||||
2858 | } | ||||
2859 | |||||
2860 | // Returns a chain and a flag for retval copy to use. | ||||
2861 | Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); | ||||
2862 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); | ||||
2863 | InFlag = Chain.getValue(1); | ||||
2864 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); | ||||
2865 | |||||
2866 | // If we're guaranteeing tail-calls will be honoured, the callee must | ||||
2867 | // pop its own argument stack on return. But this call is *not* a tail call so | ||||
2868 | // we need to undo that after it returns to restore the status-quo. | ||||
2869 | bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; | ||||
2870 | uint64_t CalleePopBytes = | ||||
2871 | canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL; | ||||
2872 | |||||
2873 | Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl); | ||||
2874 | if (!Ins.empty()) | ||||
2875 | InFlag = Chain.getValue(1); | ||||
2876 | |||||
2877 | // Handle result values, copying them out of physregs into vregs that we | ||||
2878 | // return. | ||||
2879 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, | ||||
2880 | InVals, isThisReturn, | ||||
2881 | isThisReturn ? OutVals[0] : SDValue()); | ||||
2882 | } | ||||
2883 | |||||
2884 | /// HandleByVal - Every parameter *after* a byval parameter is passed | ||||
2885 | /// on the stack. Remember the next parameter register to allocate, | ||||
2886 | /// and then confiscate the rest of the parameter registers to insure | ||||
2887 | /// this. | ||||
2888 | void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, | ||||
2889 | Align Alignment) const { | ||||
2890 | // Byval (as with any stack) slots are always at least 4 byte aligned. | ||||
2891 | Alignment = std::max(Alignment, Align(4)); | ||||
2892 | |||||
2893 | unsigned Reg = State->AllocateReg(GPRArgRegs); | ||||
2894 | if (!Reg) | ||||
2895 | return; | ||||
2896 | |||||
2897 | unsigned AlignInRegs = Alignment.value() / 4; | ||||
2898 | unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; | ||||
2899 | for (unsigned i = 0; i < Waste; ++i) | ||||
2900 | Reg = State->AllocateReg(GPRArgRegs); | ||||
2901 | |||||
2902 | if (!Reg) | ||||
2903 | return; | ||||
2904 | |||||
2905 | unsigned Excess = 4 * (ARM::R4 - Reg); | ||||
2906 | |||||
2907 | // Special case when NSAA != SP and parameter size greater than size of | ||||
2908 | // all remained GPR regs. In that case we can't split parameter, we must | ||||
2909 | // send it to stack. We also must set NCRN to R4, so waste all | ||||
2910 | // remained registers. | ||||
2911 | const unsigned NSAAOffset = State->getNextStackOffset(); | ||||
2912 | if (NSAAOffset != 0 && Size > Excess) { | ||||
2913 | while (State->AllocateReg(GPRArgRegs)) | ||||
2914 | ; | ||||
2915 | return; | ||||
2916 | } | ||||
2917 | |||||
2918 | // First register for byval parameter is the first register that wasn't | ||||
2919 | // allocated before this method call, so it would be "reg". | ||||
2920 | // If parameter is small enough to be saved in range [reg, r4), then | ||||
2921 | // the end (first after last) register would be reg + param-size-in-regs, | ||||
2922 | // else parameter would be splitted between registers and stack, | ||||
2923 | // end register would be r4 in this case. | ||||
2924 | unsigned ByValRegBegin = Reg; | ||||
2925 | unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4); | ||||
2926 | State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); | ||||
2927 | // Note, first register is allocated in the beginning of function already, | ||||
2928 | // allocate remained amount of registers we need. | ||||
2929 | for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) | ||||
2930 | State->AllocateReg(GPRArgRegs); | ||||
2931 | // A byval parameter that is split between registers and memory needs its | ||||
2932 | // size truncated here. | ||||
2933 | // In the case where the entire structure fits in registers, we set the | ||||
2934 | // size in memory to zero. | ||||
2935 | Size = std::max<int>(Size - Excess, 0); | ||||
2936 | } | ||||
2937 | |||||
2938 | /// MatchingStackOffset - Return true if the given stack call argument is | ||||
2939 | /// already available in the same position (relatively) of the caller's | ||||
2940 | /// incoming argument stack. | ||||
2941 | static | ||||
2942 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, | ||||
2943 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, | ||||
2944 | const TargetInstrInfo *TII) { | ||||
2945 | unsigned Bytes = Arg.getValueSizeInBits() / 8; | ||||
2946 | int FI = std::numeric_limits<int>::max(); | ||||
2947 | if (Arg.getOpcode() == ISD::CopyFromReg) { | ||||
2948 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); | ||||
2949 | if (!VR.isVirtual()) | ||||
2950 | return false; | ||||
2951 | MachineInstr *Def = MRI->getVRegDef(VR); | ||||
2952 | if (!Def) | ||||
2953 | return false; | ||||
2954 | if (!Flags.isByVal()) { | ||||
2955 | if (!TII->isLoadFromStackSlot(*Def, FI)) | ||||
2956 | return false; | ||||
2957 | } else { | ||||
2958 | return false; | ||||
2959 | } | ||||
2960 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { | ||||
2961 | if (Flags.isByVal()) | ||||
2962 | // ByVal argument is passed in as a pointer but it's now being | ||||
2963 | // dereferenced. e.g. | ||||
2964 | // define @foo(%struct.X* %A) { | ||||
2965 | // tail call @bar(%struct.X* byval %A) | ||||
2966 | // } | ||||
2967 | return false; | ||||
2968 | SDValue Ptr = Ld->getBasePtr(); | ||||
2969 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); | ||||
2970 | if (!FINode) | ||||
2971 | return false; | ||||
2972 | FI = FINode->getIndex(); | ||||
2973 | } else | ||||
2974 | return false; | ||||
2975 | |||||
2976 | assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int> ::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2976, __extension__ __PRETTY_FUNCTION__)); | ||||
2977 | if (!MFI.isFixedObjectIndex(FI)) | ||||
2978 | return false; | ||||
2979 | return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); | ||||
2980 | } | ||||
2981 | |||||
2982 | /// IsEligibleForTailCallOptimization - Check whether the call is eligible | ||||
2983 | /// for tail call optimization. Targets which want to do tail call | ||||
2984 | /// optimization should implement this function. | ||||
2985 | bool ARMTargetLowering::IsEligibleForTailCallOptimization( | ||||
2986 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, | ||||
2987 | bool isCalleeStructRet, bool isCallerStructRet, | ||||
2988 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
2989 | const SmallVectorImpl<SDValue> &OutVals, | ||||
2990 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG, | ||||
2991 | const bool isIndirect) const { | ||||
2992 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
2993 | const Function &CallerF = MF.getFunction(); | ||||
2994 | CallingConv::ID CallerCC = CallerF.getCallingConv(); | ||||
2995 | |||||
2996 | assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ? void (0) : __assert_fail ("Subtarget->supportsTailCall()" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 2996, __extension__ __PRETTY_FUNCTION__)); | ||||
2997 | |||||
2998 | // Indirect tail calls cannot be optimized for Thumb1 if the args | ||||
2999 | // to the call take up r0-r3. The reason is that there are no legal registers | ||||
3000 | // left to hold the pointer to the function to be called. | ||||
3001 | // Similarly, if the function uses return address sign and authentication, | ||||
3002 | // r12 is needed to hold the PAC and is not available to hold the callee | ||||
3003 | // address. | ||||
3004 | if (Outs.size() >= 4 && | ||||
3005 | (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { | ||||
3006 | if (Subtarget->isThumb1Only()) | ||||
3007 | return false; | ||||
3008 | // Conservatively assume the function spills LR. | ||||
3009 | if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) | ||||
3010 | return false; | ||||
3011 | } | ||||
3012 | |||||
3013 | // Look for obvious safe cases to perform tail call optimization that do not | ||||
3014 | // require ABI changes. This is what gcc calls sibcall. | ||||
3015 | |||||
3016 | // Exception-handling functions need a special set of instructions to indicate | ||||
3017 | // a return to the hardware. Tail-calling another function would probably | ||||
3018 | // break this. | ||||
3019 | if (CallerF.hasFnAttribute("interrupt")) | ||||
3020 | return false; | ||||
3021 | |||||
3022 | if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) | ||||
3023 | return CalleeCC == CallerCC; | ||||
3024 | |||||
3025 | // Also avoid sibcall optimization if either caller or callee uses struct | ||||
3026 | // return semantics. | ||||
3027 | if (isCalleeStructRet || isCallerStructRet) | ||||
3028 | return false; | ||||
3029 | |||||
3030 | // Externally-defined functions with weak linkage should not be | ||||
3031 | // tail-called on ARM when the OS does not support dynamic | ||||
3032 | // pre-emption of symbols, as the AAELF spec requires normal calls | ||||
3033 | // to undefined weak functions to be replaced with a NOP or jump to the | ||||
3034 | // next instruction. The behaviour of branch instructions in this | ||||
3035 | // situation (as used for tail calls) is implementation-defined, so we | ||||
3036 | // cannot rely on the linker replacing the tail call with a return. | ||||
3037 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { | ||||
3038 | const GlobalValue *GV = G->getGlobal(); | ||||
3039 | const Triple &TT = getTargetMachine().getTargetTriple(); | ||||
3040 | if (GV->hasExternalWeakLinkage() && | ||||
3041 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) | ||||
3042 | return false; | ||||
3043 | } | ||||
3044 | |||||
3045 | // Check that the call results are passed in the same way. | ||||
3046 | LLVMContext &C = *DAG.getContext(); | ||||
3047 | if (!CCState::resultsCompatible( | ||||
3048 | getEffectiveCallingConv(CalleeCC, isVarArg), | ||||
3049 | getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, | ||||
3050 | CCAssignFnForReturn(CalleeCC, isVarArg), | ||||
3051 | CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) | ||||
3052 | return false; | ||||
3053 | // The callee has to preserve all registers the caller needs to preserve. | ||||
3054 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
3055 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); | ||||
3056 | if (CalleeCC != CallerCC) { | ||||
3057 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); | ||||
3058 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) | ||||
3059 | return false; | ||||
3060 | } | ||||
3061 | |||||
3062 | // If Caller's vararg or byval argument has been split between registers and | ||||
3063 | // stack, do not perform tail call, since part of the argument is in caller's | ||||
3064 | // local frame. | ||||
3065 | const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); | ||||
3066 | if (AFI_Caller->getArgRegsSaveSize()) | ||||
3067 | return false; | ||||
3068 | |||||
3069 | // If the callee takes no arguments then go on to check the results of the | ||||
3070 | // call. | ||||
3071 | if (!Outs.empty()) { | ||||
3072 | // Check if stack adjustment is needed. For now, do not do this if any | ||||
3073 | // argument is passed on the stack. | ||||
3074 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
3075 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); | ||||
3076 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); | ||||
3077 | if (CCInfo.getNextStackOffset()) { | ||||
3078 | // Check if the arguments are already laid out in the right way as | ||||
3079 | // the caller's fixed stack objects. | ||||
3080 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
3081 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); | ||||
3082 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
3083 | for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); | ||||
3084 | i != e; | ||||
3085 | ++i, ++realArgIdx) { | ||||
3086 | CCValAssign &VA = ArgLocs[i]; | ||||
3087 | EVT RegVT = VA.getLocVT(); | ||||
3088 | SDValue Arg = OutVals[realArgIdx]; | ||||
3089 | ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; | ||||
3090 | if (VA.getLocInfo() == CCValAssign::Indirect) | ||||
3091 | return false; | ||||
3092 | if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { | ||||
3093 | // f64 and vector types are split into multiple registers or | ||||
3094 | // register/stack-slot combinations. The types will not match | ||||
3095 | // the registers; give up on memory f64 refs until we figure | ||||
3096 | // out what to do about this. | ||||
3097 | if (!VA.isRegLoc()) | ||||
3098 | return false; | ||||
3099 | if (!ArgLocs[++i].isRegLoc()) | ||||
3100 | return false; | ||||
3101 | if (RegVT == MVT::v2f64) { | ||||
3102 | if (!ArgLocs[++i].isRegLoc()) | ||||
3103 | return false; | ||||
3104 | if (!ArgLocs[++i].isRegLoc()) | ||||
3105 | return false; | ||||
3106 | } | ||||
3107 | } else if (!VA.isRegLoc()) { | ||||
3108 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, | ||||
3109 | MFI, MRI, TII)) | ||||
3110 | return false; | ||||
3111 | } | ||||
3112 | } | ||||
3113 | } | ||||
3114 | |||||
3115 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | ||||
3116 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) | ||||
3117 | return false; | ||||
3118 | } | ||||
3119 | |||||
3120 | return true; | ||||
3121 | } | ||||
3122 | |||||
3123 | bool | ||||
3124 | ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, | ||||
3125 | MachineFunction &MF, bool isVarArg, | ||||
3126 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
3127 | LLVMContext &Context) const { | ||||
3128 | SmallVector<CCValAssign, 16> RVLocs; | ||||
3129 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); | ||||
3130 | return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
3131 | } | ||||
3132 | |||||
3133 | static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, | ||||
3134 | const SDLoc &DL, SelectionDAG &DAG) { | ||||
3135 | const MachineFunction &MF = DAG.getMachineFunction(); | ||||
3136 | const Function &F = MF.getFunction(); | ||||
3137 | |||||
3138 | StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); | ||||
3139 | |||||
3140 | // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset | ||||
3141 | // version of the "preferred return address". These offsets affect the return | ||||
3142 | // instruction if this is a return from PL1 without hypervisor extensions. | ||||
3143 | // IRQ/FIQ: +4 "subs pc, lr, #4" | ||||
3144 | // SWI: 0 "subs pc, lr, #0" | ||||
3145 | // ABORT: +4 "subs pc, lr, #4" | ||||
3146 | // UNDEF: +4/+2 "subs pc, lr, #0" | ||||
3147 | // UNDEF varies depending on where the exception came from ARM or Thumb | ||||
3148 | // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. | ||||
3149 | |||||
3150 | int64_t LROffset; | ||||
3151 | if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || | ||||
3152 | IntKind == "ABORT") | ||||
3153 | LROffset = 4; | ||||
3154 | else if (IntKind == "SWI" || IntKind == "UNDEF") | ||||
3155 | LROffset = 0; | ||||
3156 | else | ||||
3157 | report_fatal_error("Unsupported interrupt attribute. If present, value " | ||||
3158 | "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); | ||||
3159 | |||||
3160 | RetOps.insert(RetOps.begin() + 1, | ||||
3161 | DAG.getConstant(LROffset, DL, MVT::i32, false)); | ||||
3162 | |||||
3163 | return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); | ||||
3164 | } | ||||
3165 | |||||
3166 | SDValue | ||||
3167 | ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, | ||||
3168 | bool isVarArg, | ||||
3169 | const SmallVectorImpl<ISD::OutputArg> &Outs, | ||||
3170 | const SmallVectorImpl<SDValue> &OutVals, | ||||
3171 | const SDLoc &dl, SelectionDAG &DAG) const { | ||||
3172 | // CCValAssign - represent the assignment of the return value to a location. | ||||
3173 | SmallVector<CCValAssign, 16> RVLocs; | ||||
3174 | |||||
3175 | // CCState - Info about the registers and stack slots. | ||||
3176 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, | ||||
3177 | *DAG.getContext()); | ||||
3178 | |||||
3179 | // Analyze outgoing return values. | ||||
3180 | CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); | ||||
3181 | |||||
3182 | SDValue Flag; | ||||
3183 | SmallVector<SDValue, 4> RetOps; | ||||
3184 | RetOps.push_back(Chain); // Operand #0 = Chain (updated below) | ||||
3185 | bool isLittleEndian = Subtarget->isLittle(); | ||||
3186 | |||||
3187 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3188 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3189 | AFI->setReturnRegsCount(RVLocs.size()); | ||||
3190 | |||||
3191 | // Report error if cmse entry function returns structure through first ptr arg. | ||||
3192 | if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) { | ||||
3193 | // Note: using an empty SDLoc(), as the first line of the function is a | ||||
3194 | // better place to report than the last line. | ||||
3195 | DiagnosticInfoUnsupported Diag( | ||||
3196 | DAG.getMachineFunction().getFunction(), | ||||
3197 | "secure entry function would return value through pointer", | ||||
3198 | SDLoc().getDebugLoc()); | ||||
3199 | DAG.getContext()->diagnose(Diag); | ||||
3200 | } | ||||
3201 | |||||
3202 | // Copy the result values into the output registers. | ||||
3203 | for (unsigned i = 0, realRVLocIdx = 0; | ||||
3204 | i != RVLocs.size(); | ||||
3205 | ++i, ++realRVLocIdx) { | ||||
3206 | CCValAssign &VA = RVLocs[i]; | ||||
3207 | assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!" ) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3207, __extension__ __PRETTY_FUNCTION__)); | ||||
3208 | |||||
3209 | SDValue Arg = OutVals[realRVLocIdx]; | ||||
3210 | bool ReturnF16 = false; | ||||
3211 | |||||
3212 | if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { | ||||
3213 | // Half-precision return values can be returned like this: | ||||
3214 | // | ||||
3215 | // t11 f16 = fadd ... | ||||
3216 | // t12: i16 = bitcast t11 | ||||
3217 | // t13: i32 = zero_extend t12 | ||||
3218 | // t14: f32 = bitcast t13 <~~~~~~~ Arg | ||||
3219 | // | ||||
3220 | // to avoid code generation for bitcasts, we simply set Arg to the node | ||||
3221 | // that produces the f16 value, t11 in this case. | ||||
3222 | // | ||||
3223 | if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) { | ||||
3224 | SDValue ZE = Arg.getOperand(0); | ||||
3225 | if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) { | ||||
3226 | SDValue BC = ZE.getOperand(0); | ||||
3227 | if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) { | ||||
3228 | Arg = BC.getOperand(0); | ||||
3229 | ReturnF16 = true; | ||||
3230 | } | ||||
3231 | } | ||||
3232 | } | ||||
3233 | } | ||||
3234 | |||||
3235 | switch (VA.getLocInfo()) { | ||||
3236 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3236); | ||||
3237 | case CCValAssign::Full: break; | ||||
3238 | case CCValAssign::BCvt: | ||||
3239 | if (!ReturnF16) | ||||
3240 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
3241 | break; | ||||
3242 | } | ||||
3243 | |||||
3244 | // Mask f16 arguments if this is a CMSE nonsecure entry. | ||||
3245 | auto RetVT = Outs[realRVLocIdx].ArgVT; | ||||
3246 | if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) { | ||||
3247 | if (VA.needsCustom() && VA.getValVT() == MVT::f16) { | ||||
3248 | Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg); | ||||
3249 | } else { | ||||
3250 | auto LocBits = VA.getLocVT().getSizeInBits(); | ||||
3251 | auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits()); | ||||
3252 | SDValue Mask = | ||||
3253 | DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits)); | ||||
3254 | Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg); | ||||
3255 | Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask); | ||||
3256 | Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); | ||||
3257 | } | ||||
3258 | } | ||||
3259 | |||||
3260 | if (VA.needsCustom() && | ||||
3261 | (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) { | ||||
3262 | if (VA.getLocVT() == MVT::v2f64) { | ||||
3263 | // Extract the first half and return it in two registers. | ||||
3264 | SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
3265 | DAG.getConstant(0, dl, MVT::i32)); | ||||
3266 | SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
3267 | DAG.getVTList(MVT::i32, MVT::i32), Half); | ||||
3268 | |||||
3269 | Chain = | ||||
3270 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3271 | HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); | ||||
3272 | Flag = Chain.getValue(1); | ||||
3273 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3274 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3275 | Chain = | ||||
3276 | DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3277 | HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); | ||||
3278 | Flag = Chain.getValue(1); | ||||
3279 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3280 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3281 | |||||
3282 | // Extract the 2nd half and fall through to handle it as an f64 value. | ||||
3283 | Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, | ||||
3284 | DAG.getConstant(1, dl, MVT::i32)); | ||||
3285 | } | ||||
3286 | // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is | ||||
3287 | // available. | ||||
3288 | SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
3289 | DAG.getVTList(MVT::i32, MVT::i32), Arg); | ||||
3290 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3291 | fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); | ||||
3292 | Flag = Chain.getValue(1); | ||||
3293 | RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); | ||||
3294 | VA = RVLocs[++i]; // skip ahead to next loc | ||||
3295 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | ||||
3296 | fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); | ||||
3297 | } else | ||||
3298 | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); | ||||
3299 | |||||
3300 | // Guarantee that all emitted copies are | ||||
3301 | // stuck together, avoiding something bad. | ||||
3302 | Flag = Chain.getValue(1); | ||||
3303 | RetOps.push_back(DAG.getRegister( | ||||
3304 | VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT())); | ||||
3305 | } | ||||
3306 | const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); | ||||
3307 | const MCPhysReg *I = | ||||
3308 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); | ||||
3309 | if (I) { | ||||
3310 | for (; *I; ++I) { | ||||
3311 | if (ARM::GPRRegClass.contains(*I)) | ||||
3312 | RetOps.push_back(DAG.getRegister(*I, MVT::i32)); | ||||
3313 | else if (ARM::DPRRegClass.contains(*I)) | ||||
3314 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); | ||||
3315 | else | ||||
3316 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3316); | ||||
3317 | } | ||||
3318 | } | ||||
3319 | |||||
3320 | // Update chain and glue. | ||||
3321 | RetOps[0] = Chain; | ||||
3322 | if (Flag.getNode()) | ||||
3323 | RetOps.push_back(Flag); | ||||
3324 | |||||
3325 | // CPUs which aren't M-class use a special sequence to return from | ||||
3326 | // exceptions (roughly, any instruction setting pc and cpsr simultaneously, | ||||
3327 | // though we use "subs pc, lr, #N"). | ||||
3328 | // | ||||
3329 | // M-class CPUs actually use a normal return sequence with a special | ||||
3330 | // (hardware-provided) value in LR, so the normal code path works. | ||||
3331 | if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && | ||||
3332 | !Subtarget->isMClass()) { | ||||
3333 | if (Subtarget->isThumb1Only()) | ||||
3334 | report_fatal_error("interrupt attribute is not supported in Thumb1"); | ||||
3335 | return LowerInterruptReturn(RetOps, dl, DAG); | ||||
3336 | } | ||||
3337 | |||||
3338 | ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG : | ||||
3339 | ARMISD::RET_FLAG; | ||||
3340 | return DAG.getNode(RetNode, dl, MVT::Other, RetOps); | ||||
3341 | } | ||||
3342 | |||||
3343 | bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { | ||||
3344 | if (N->getNumValues() != 1) | ||||
3345 | return false; | ||||
3346 | if (!N->hasNUsesOfValue(1, 0)) | ||||
3347 | return false; | ||||
3348 | |||||
3349 | SDValue TCChain = Chain; | ||||
3350 | SDNode *Copy = *N->use_begin(); | ||||
3351 | if (Copy->getOpcode() == ISD::CopyToReg) { | ||||
3352 | // If the copy has a glue operand, we conservatively assume it isn't safe to | ||||
3353 | // perform a tail call. | ||||
3354 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | ||||
3355 | return false; | ||||
3356 | TCChain = Copy->getOperand(0); | ||||
3357 | } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { | ||||
3358 | SDNode *VMov = Copy; | ||||
3359 | // f64 returned in a pair of GPRs. | ||||
3360 | SmallPtrSet<SDNode*, 2> Copies; | ||||
3361 | for (SDNode *U : VMov->uses()) { | ||||
3362 | if (U->getOpcode() != ISD::CopyToReg) | ||||
3363 | return false; | ||||
3364 | Copies.insert(U); | ||||
3365 | } | ||||
3366 | if (Copies.size() > 2) | ||||
3367 | return false; | ||||
3368 | |||||
3369 | for (SDNode *U : VMov->uses()) { | ||||
3370 | SDValue UseChain = U->getOperand(0); | ||||
3371 | if (Copies.count(UseChain.getNode())) | ||||
3372 | // Second CopyToReg | ||||
3373 | Copy = U; | ||||
3374 | else { | ||||
3375 | // We are at the top of this chain. | ||||
3376 | // If the copy has a glue operand, we conservatively assume it | ||||
3377 | // isn't safe to perform a tail call. | ||||
3378 | if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue) | ||||
3379 | return false; | ||||
3380 | // First CopyToReg | ||||
3381 | TCChain = UseChain; | ||||
3382 | } | ||||
3383 | } | ||||
3384 | } else if (Copy->getOpcode() == ISD::BITCAST) { | ||||
3385 | // f32 returned in a single GPR. | ||||
3386 | if (!Copy->hasOneUse()) | ||||
3387 | return false; | ||||
3388 | Copy = *Copy->use_begin(); | ||||
3389 | if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) | ||||
3390 | return false; | ||||
3391 | // If the copy has a glue operand, we conservatively assume it isn't safe to | ||||
3392 | // perform a tail call. | ||||
3393 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) | ||||
3394 | return false; | ||||
3395 | TCChain = Copy->getOperand(0); | ||||
3396 | } else { | ||||
3397 | return false; | ||||
3398 | } | ||||
3399 | |||||
3400 | bool HasRet = false; | ||||
3401 | for (const SDNode *U : Copy->uses()) { | ||||
3402 | if (U->getOpcode() != ARMISD::RET_FLAG && | ||||
3403 | U->getOpcode() != ARMISD::INTRET_FLAG) | ||||
3404 | return false; | ||||
3405 | HasRet = true; | ||||
3406 | } | ||||
3407 | |||||
3408 | if (!HasRet) | ||||
3409 | return false; | ||||
3410 | |||||
3411 | Chain = TCChain; | ||||
3412 | return true; | ||||
3413 | } | ||||
3414 | |||||
3415 | bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { | ||||
3416 | if (!Subtarget->supportsTailCall()) | ||||
3417 | return false; | ||||
3418 | |||||
3419 | if (!CI->isTailCall()) | ||||
3420 | return false; | ||||
3421 | |||||
3422 | return true; | ||||
3423 | } | ||||
3424 | |||||
3425 | // Trying to write a 64 bit value so need to split into two 32 bit values first, | ||||
3426 | // and pass the lower and high parts through. | ||||
3427 | static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { | ||||
3428 | SDLoc DL(Op); | ||||
3429 | SDValue WriteValue = Op->getOperand(2); | ||||
3430 | |||||
3431 | // This function is only supposed to be called for i64 type argument. | ||||
3432 | assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3433, __extension__ __PRETTY_FUNCTION__)) | ||||
3433 | && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT:: i64 && "LowerWRITE_REGISTER called for non-i64 type argument." ) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3433, __extension__ __PRETTY_FUNCTION__)); | ||||
3434 | |||||
3435 | SDValue Lo, Hi; | ||||
3436 | std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32); | ||||
3437 | SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; | ||||
3438 | return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); | ||||
3439 | } | ||||
3440 | |||||
3441 | // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as | ||||
3442 | // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is | ||||
3443 | // one of the above mentioned nodes. It has to be wrapped because otherwise | ||||
3444 | // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only | ||||
3445 | // be used to form addressing mode. These wrapped nodes will be selected | ||||
3446 | // into MOVi. | ||||
3447 | SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, | ||||
3448 | SelectionDAG &DAG) const { | ||||
3449 | EVT PtrVT = Op.getValueType(); | ||||
3450 | // FIXME there is no actual debug info here | ||||
3451 | SDLoc dl(Op); | ||||
3452 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); | ||||
3453 | SDValue Res; | ||||
3454 | |||||
3455 | // When generating execute-only code Constant Pools must be promoted to the | ||||
3456 | // global data section. It's a bit ugly that we can't share them across basic | ||||
3457 | // blocks, but this way we guarantee that execute-only behaves correct with | ||||
3458 | // position-independent addressing modes. | ||||
3459 | if (Subtarget->genExecuteOnly()) { | ||||
3460 | auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); | ||||
3461 | auto T = const_cast<Type*>(CP->getType()); | ||||
3462 | auto C = const_cast<Constant*>(CP->getConstVal()); | ||||
3463 | auto M = const_cast<Module*>(DAG.getMachineFunction(). | ||||
3464 | getFunction().getParent()); | ||||
3465 | auto GV = new GlobalVariable( | ||||
3466 | *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, | ||||
3467 | Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + | ||||
3468 | Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + | ||||
3469 | Twine(AFI->createPICLabelUId()) | ||||
3470 | ); | ||||
3471 | SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), | ||||
3472 | dl, PtrVT); | ||||
3473 | return LowerGlobalAddress(GA, DAG); | ||||
3474 | } | ||||
3475 | |||||
3476 | // The 16-bit ADR instruction can only encode offsets that are multiples of 4, | ||||
3477 | // so we need to align to at least 4 bytes when we don't have 32-bit ADR. | ||||
3478 | Align CPAlign = CP->getAlign(); | ||||
3479 | if (Subtarget->isThumb1Only()) | ||||
3480 | CPAlign = std::max(CPAlign, Align(4)); | ||||
3481 | if (CP->isMachineConstantPoolEntry()) | ||||
3482 | Res = | ||||
3483 | DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign); | ||||
3484 | else | ||||
3485 | Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign); | ||||
3486 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); | ||||
3487 | } | ||||
3488 | |||||
3489 | unsigned ARMTargetLowering::getJumpTableEncoding() const { | ||||
3490 | return MachineJumpTableInfo::EK_Inline; | ||||
3491 | } | ||||
3492 | |||||
3493 | SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, | ||||
3494 | SelectionDAG &DAG) const { | ||||
3495 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3496 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3497 | unsigned ARMPCLabelIndex = 0; | ||||
3498 | SDLoc DL(Op); | ||||
3499 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3500 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); | ||||
3501 | SDValue CPAddr; | ||||
3502 | bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); | ||||
3503 | if (!IsPositionIndependent) { | ||||
3504 | CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4)); | ||||
3505 | } else { | ||||
3506 | unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3507 | ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3508 | ARMConstantPoolValue *CPV = | ||||
3509 | ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, | ||||
3510 | ARMCP::CPBlockAddress, PCAdj); | ||||
3511 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3512 | } | ||||
3513 | CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); | ||||
3514 | SDValue Result = DAG.getLoad( | ||||
3515 | PtrVT, DL, DAG.getEntryNode(), CPAddr, | ||||
3516 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3517 | if (!IsPositionIndependent) | ||||
3518 | return Result; | ||||
3519 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); | ||||
3520 | return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); | ||||
3521 | } | ||||
3522 | |||||
3523 | /// Convert a TLS address reference into the correct sequence of loads | ||||
3524 | /// and calls to compute the variable's address for Darwin, and return an | ||||
3525 | /// SDValue containing the final node. | ||||
3526 | |||||
3527 | /// Darwin only has one TLS scheme which must be capable of dealing with the | ||||
3528 | /// fully general situation, in the worst case. This means: | ||||
3529 | /// + "extern __thread" declaration. | ||||
3530 | /// + Defined in a possibly unknown dynamic library. | ||||
3531 | /// | ||||
3532 | /// The general system is that each __thread variable has a [3 x i32] descriptor | ||||
3533 | /// which contains information used by the runtime to calculate the address. The | ||||
3534 | /// only part of this the compiler needs to know about is the first word, which | ||||
3535 | /// contains a function pointer that must be called with the address of the | ||||
3536 | /// entire descriptor in "r0". | ||||
3537 | /// | ||||
3538 | /// Since this descriptor may be in a different unit, in general access must | ||||
3539 | /// proceed along the usual ARM rules. A common sequence to produce is: | ||||
3540 | /// | ||||
3541 | /// movw rT1, :lower16:_var$non_lazy_ptr | ||||
3542 | /// movt rT1, :upper16:_var$non_lazy_ptr | ||||
3543 | /// ldr r0, [rT1] | ||||
3544 | /// ldr rT2, [r0] | ||||
3545 | /// blx rT2 | ||||
3546 | /// [...address now in r0...] | ||||
3547 | SDValue | ||||
3548 | ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, | ||||
3549 | SelectionDAG &DAG) const { | ||||
3550 | assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3551, __extension__ __PRETTY_FUNCTION__)) | ||||
3551 | "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() && "This function expects a Darwin target") ? void (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3551, __extension__ __PRETTY_FUNCTION__)); | ||||
3552 | SDLoc DL(Op); | ||||
3553 | |||||
3554 | // First step is to get the address of the actua global symbol. This is where | ||||
3555 | // the TLS descriptor lives. | ||||
3556 | SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); | ||||
3557 | |||||
3558 | // The first entry in the descriptor is a function pointer that we must call | ||||
3559 | // to obtain the address of the variable. | ||||
3560 | SDValue Chain = DAG.getEntryNode(); | ||||
3561 | SDValue FuncTLVGet = DAG.getLoad( | ||||
3562 | MVT::i32, DL, Chain, DescAddr, | ||||
3563 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4), | ||||
3564 | MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | | ||||
3565 | MachineMemOperand::MOInvariant); | ||||
3566 | Chain = FuncTLVGet.getValue(1); | ||||
3567 | |||||
3568 | MachineFunction &F = DAG.getMachineFunction(); | ||||
3569 | MachineFrameInfo &MFI = F.getFrameInfo(); | ||||
3570 | MFI.setAdjustsStack(true); | ||||
3571 | |||||
3572 | // TLS calls preserve all registers except those that absolutely must be | ||||
3573 | // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be | ||||
3574 | // silly). | ||||
3575 | auto TRI = | ||||
3576 | getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); | ||||
3577 | auto ARI = static_cast<const ARMRegisterInfo *>(TRI); | ||||
3578 | const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); | ||||
3579 | |||||
3580 | // Finally, we can make the call. This is just a degenerate version of a | ||||
3581 | // normal AArch64 call node: r0 takes the address of the descriptor, and | ||||
3582 | // returns the address of the variable in this thread. | ||||
3583 | Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); | ||||
3584 | Chain = | ||||
3585 | DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), | ||||
3586 | Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), | ||||
3587 | DAG.getRegisterMask(Mask), Chain.getValue(1)); | ||||
3588 | return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); | ||||
3589 | } | ||||
3590 | |||||
3591 | SDValue | ||||
3592 | ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, | ||||
3593 | SelectionDAG &DAG) const { | ||||
3594 | assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() && "Windows specific TLS lowering") ? void (0) : __assert_fail ( "Subtarget->isTargetWindows() && \"Windows specific TLS lowering\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3594, __extension__ __PRETTY_FUNCTION__)); | ||||
3595 | |||||
3596 | SDValue Chain = DAG.getEntryNode(); | ||||
3597 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3598 | SDLoc DL(Op); | ||||
3599 | |||||
3600 | // Load the current TEB (thread environment block) | ||||
3601 | SDValue Ops[] = {Chain, | ||||
3602 | DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32), | ||||
3603 | DAG.getTargetConstant(15, DL, MVT::i32), | ||||
3604 | DAG.getTargetConstant(0, DL, MVT::i32), | ||||
3605 | DAG.getTargetConstant(13, DL, MVT::i32), | ||||
3606 | DAG.getTargetConstant(0, DL, MVT::i32), | ||||
3607 | DAG.getTargetConstant(2, DL, MVT::i32)}; | ||||
3608 | SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, | ||||
3609 | DAG.getVTList(MVT::i32, MVT::Other), Ops); | ||||
3610 | |||||
3611 | SDValue TEB = CurrentTEB.getValue(0); | ||||
3612 | Chain = CurrentTEB.getValue(1); | ||||
3613 | |||||
3614 | // Load the ThreadLocalStoragePointer from the TEB | ||||
3615 | // A pointer to the TLS array is located at offset 0x2c from the TEB. | ||||
3616 | SDValue TLSArray = | ||||
3617 | DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); | ||||
3618 | TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); | ||||
3619 | |||||
3620 | // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 | ||||
3621 | // offset into the TLSArray. | ||||
3622 | |||||
3623 | // Load the TLS index from the C runtime | ||||
3624 | SDValue TLSIndex = | ||||
3625 | DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); | ||||
3626 | TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); | ||||
3627 | TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); | ||||
3628 | |||||
3629 | SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, | ||||
3630 | DAG.getConstant(2, DL, MVT::i32)); | ||||
3631 | SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, | ||||
3632 | DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), | ||||
3633 | MachinePointerInfo()); | ||||
3634 | |||||
3635 | // Get the offset of the start of the .tls section (section base) | ||||
3636 | const auto *GA = cast<GlobalAddressSDNode>(Op); | ||||
3637 | auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); | ||||
3638 | SDValue Offset = DAG.getLoad( | ||||
3639 | PtrVT, DL, Chain, | ||||
3640 | DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, | ||||
3641 | DAG.getTargetConstantPool(CPV, PtrVT, Align(4))), | ||||
3642 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3643 | |||||
3644 | return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); | ||||
3645 | } | ||||
3646 | |||||
3647 | // Lower ISD::GlobalTLSAddress using the "general dynamic" model | ||||
3648 | SDValue | ||||
3649 | ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, | ||||
3650 | SelectionDAG &DAG) const { | ||||
3651 | SDLoc dl(GA); | ||||
3652 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3653 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3654 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3655 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3656 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3657 | ARMConstantPoolValue *CPV = | ||||
3658 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | ||||
3659 | ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); | ||||
3660 | SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3661 | Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); | ||||
3662 | Argument = DAG.getLoad( | ||||
3663 | PtrVT, dl, DAG.getEntryNode(), Argument, | ||||
3664 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3665 | SDValue Chain = Argument.getValue(1); | ||||
3666 | |||||
3667 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
3668 | Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); | ||||
3669 | |||||
3670 | // call __tls_get_addr. | ||||
3671 | ArgListTy Args; | ||||
3672 | ArgListEntry Entry; | ||||
3673 | Entry.Node = Argument; | ||||
3674 | Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); | ||||
3675 | Args.push_back(Entry); | ||||
3676 | |||||
3677 | // FIXME: is there useful debug info available here? | ||||
3678 | TargetLowering::CallLoweringInfo CLI(DAG); | ||||
3679 | CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( | ||||
3680 | CallingConv::C, Type::getInt32Ty(*DAG.getContext()), | ||||
3681 | DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); | ||||
3682 | |||||
3683 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); | ||||
3684 | return CallResult.first; | ||||
3685 | } | ||||
3686 | |||||
3687 | // Lower ISD::GlobalTLSAddress using the "initial exec" or | ||||
3688 | // "local exec" model. | ||||
3689 | SDValue | ||||
3690 | ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, | ||||
3691 | SelectionDAG &DAG, | ||||
3692 | TLSModel::Model model) const { | ||||
3693 | const GlobalValue *GV = GA->getGlobal(); | ||||
3694 | SDLoc dl(GA); | ||||
3695 | SDValue Offset; | ||||
3696 | SDValue Chain = DAG.getEntryNode(); | ||||
3697 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3698 | // Get the Thread Pointer | ||||
3699 | SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | ||||
3700 | |||||
3701 | if (model == TLSModel::InitialExec) { | ||||
3702 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3703 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3704 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
3705 | // Initial exec model. | ||||
3706 | unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; | ||||
3707 | ARMConstantPoolValue *CPV = | ||||
3708 | ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, | ||||
3709 | ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, | ||||
3710 | true); | ||||
3711 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3712 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | ||||
3713 | Offset = DAG.getLoad( | ||||
3714 | PtrVT, dl, Chain, Offset, | ||||
3715 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3716 | Chain = Offset.getValue(1); | ||||
3717 | |||||
3718 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
3719 | Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); | ||||
3720 | |||||
3721 | Offset = DAG.getLoad( | ||||
3722 | PtrVT, dl, Chain, Offset, | ||||
3723 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3724 | } else { | ||||
3725 | // local exec model | ||||
3726 | assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void (0) : __assert_fail ("model == TLSModel::LocalExec", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3726, __extension__ __PRETTY_FUNCTION__)); | ||||
3727 | ARMConstantPoolValue *CPV = | ||||
3728 | ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); | ||||
3729 | Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3730 | Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); | ||||
3731 | Offset = DAG.getLoad( | ||||
3732 | PtrVT, dl, Chain, Offset, | ||||
3733 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3734 | } | ||||
3735 | |||||
3736 | // The address of the thread local variable is the add of the thread | ||||
3737 | // pointer with the offset of the variable. | ||||
3738 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); | ||||
3739 | } | ||||
3740 | |||||
3741 | SDValue | ||||
3742 | ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { | ||||
3743 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); | ||||
3744 | if (DAG.getTarget().useEmulatedTLS()) | ||||
3745 | return LowerToTLSEmulatedModel(GA, DAG); | ||||
3746 | |||||
3747 | if (Subtarget->isTargetDarwin()) | ||||
3748 | return LowerGlobalTLSAddressDarwin(Op, DAG); | ||||
3749 | |||||
3750 | if (Subtarget->isTargetWindows()) | ||||
3751 | return LowerGlobalTLSAddressWindows(Op, DAG); | ||||
3752 | |||||
3753 | // TODO: implement the "local dynamic" model | ||||
3754 | assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() && "Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3754, __extension__ __PRETTY_FUNCTION__)); | ||||
3755 | TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); | ||||
3756 | |||||
3757 | switch (model) { | ||||
3758 | case TLSModel::GeneralDynamic: | ||||
3759 | case TLSModel::LocalDynamic: | ||||
3760 | return LowerToTLSGeneralDynamicModel(GA, DAG); | ||||
3761 | case TLSModel::InitialExec: | ||||
3762 | case TLSModel::LocalExec: | ||||
3763 | return LowerToTLSExecModels(GA, DAG, model); | ||||
3764 | } | ||||
3765 | llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3765); | ||||
3766 | } | ||||
3767 | |||||
3768 | /// Return true if all users of V are within function F, looking through | ||||
3769 | /// ConstantExprs. | ||||
3770 | static bool allUsersAreInFunction(const Value *V, const Function *F) { | ||||
3771 | SmallVector<const User*,4> Worklist(V->users()); | ||||
3772 | while (!Worklist.empty()) { | ||||
3773 | auto *U = Worklist.pop_back_val(); | ||||
3774 | if (isa<ConstantExpr>(U)) { | ||||
3775 | append_range(Worklist, U->users()); | ||||
3776 | continue; | ||||
3777 | } | ||||
3778 | |||||
3779 | auto *I = dyn_cast<Instruction>(U); | ||||
3780 | if (!I || I->getParent()->getParent() != F) | ||||
3781 | return false; | ||||
3782 | } | ||||
3783 | return true; | ||||
3784 | } | ||||
3785 | |||||
3786 | static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, | ||||
3787 | const GlobalValue *GV, SelectionDAG &DAG, | ||||
3788 | EVT PtrVT, const SDLoc &dl) { | ||||
3789 | // If we're creating a pool entry for a constant global with unnamed address, | ||||
3790 | // and the global is small enough, we can emit it inline into the constant pool | ||||
3791 | // to save ourselves an indirection. | ||||
3792 | // | ||||
3793 | // This is a win if the constant is only used in one function (so it doesn't | ||||
3794 | // need to be duplicated) or duplicating the constant wouldn't increase code | ||||
3795 | // size (implying the constant is no larger than 4 bytes). | ||||
3796 | const Function &F = DAG.getMachineFunction().getFunction(); | ||||
3797 | |||||
3798 | // We rely on this decision to inline being idemopotent and unrelated to the | ||||
3799 | // use-site. We know that if we inline a variable at one use site, we'll | ||||
3800 | // inline it elsewhere too (and reuse the constant pool entry). Fast-isel | ||||
3801 | // doesn't know about this optimization, so bail out if it's enabled else | ||||
3802 | // we could decide to inline here (and thus never emit the GV) but require | ||||
3803 | // the GV from fast-isel generated code. | ||||
3804 | if (!EnableConstpoolPromotion || | ||||
3805 | DAG.getMachineFunction().getTarget().Options.EnableFastISel) | ||||
3806 | return SDValue(); | ||||
3807 | |||||
3808 | auto *GVar = dyn_cast<GlobalVariable>(GV); | ||||
3809 | if (!GVar || !GVar->hasInitializer() || | ||||
3810 | !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || | ||||
3811 | !GVar->hasLocalLinkage()) | ||||
3812 | return SDValue(); | ||||
3813 | |||||
3814 | // If we inline a value that contains relocations, we move the relocations | ||||
3815 | // from .data to .text. This is not allowed in position-independent code. | ||||
3816 | auto *Init = GVar->getInitializer(); | ||||
3817 | if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && | ||||
3818 | Init->needsDynamicRelocation()) | ||||
3819 | return SDValue(); | ||||
3820 | |||||
3821 | // The constant islands pass can only really deal with alignment requests | ||||
3822 | // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote | ||||
3823 | // any type wanting greater alignment requirements than 4 bytes. We also | ||||
3824 | // can only promote constants that are multiples of 4 bytes in size or | ||||
3825 | // are paddable to a multiple of 4. Currently we only try and pad constants | ||||
3826 | // that are strings for simplicity. | ||||
3827 | auto *CDAInit = dyn_cast<ConstantDataArray>(Init); | ||||
3828 | unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); | ||||
3829 | Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar); | ||||
3830 | unsigned RequiredPadding = 4 - (Size % 4); | ||||
3831 | bool PaddingPossible = | ||||
3832 | RequiredPadding == 4 || (CDAInit && CDAInit->isString()); | ||||
3833 | if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize || | ||||
3834 | Size == 0) | ||||
3835 | return SDValue(); | ||||
3836 | |||||
3837 | unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); | ||||
3838 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
3839 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
3840 | |||||
3841 | // We can't bloat the constant pool too much, else the ConstantIslands pass | ||||
3842 | // may fail to converge. If we haven't promoted this global yet (it may have | ||||
3843 | // multiple uses), and promoting it would increase the constant pool size (Sz | ||||
3844 | // > 4), ensure we have space to do so up to MaxTotal. | ||||
3845 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) | ||||
3846 | if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= | ||||
3847 | ConstpoolPromotionMaxTotal) | ||||
3848 | return SDValue(); | ||||
3849 | |||||
3850 | // This is only valid if all users are in a single function; we can't clone | ||||
3851 | // the constant in general. The LLVM IR unnamed_addr allows merging | ||||
3852 | // constants, but not cloning them. | ||||
3853 | // | ||||
3854 | // We could potentially allow cloning if we could prove all uses of the | ||||
3855 | // constant in the current function don't care about the address, like | ||||
3856 | // printf format strings. But that isn't implemented for now. | ||||
3857 | if (!allUsersAreInFunction(GVar, &F)) | ||||
3858 | return SDValue(); | ||||
3859 | |||||
3860 | // We're going to inline this global. Pad it out if needed. | ||||
3861 | if (RequiredPadding != 4) { | ||||
3862 | StringRef S = CDAInit->getAsString(); | ||||
3863 | |||||
3864 | SmallVector<uint8_t,16> V(S.size()); | ||||
3865 | std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); | ||||
3866 | while (RequiredPadding--) | ||||
3867 | V.push_back(0); | ||||
3868 | Init = ConstantDataArray::get(*DAG.getContext(), V); | ||||
3869 | } | ||||
3870 | |||||
3871 | auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); | ||||
3872 | SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4)); | ||||
3873 | if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { | ||||
3874 | AFI->markGlobalAsPromotedToConstantPool(GVar); | ||||
3875 | AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + | ||||
3876 | PaddedSize - 4); | ||||
3877 | } | ||||
3878 | ++NumConstpoolPromoted; | ||||
3879 | return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3880 | } | ||||
3881 | |||||
3882 | bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { | ||||
3883 | if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) | ||||
3884 | if (!(GV = GA->getAliaseeObject())) | ||||
3885 | return false; | ||||
3886 | if (const auto *V = dyn_cast<GlobalVariable>(GV)) | ||||
3887 | return V->isConstant(); | ||||
3888 | return isa<Function>(GV); | ||||
3889 | } | ||||
3890 | |||||
3891 | SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, | ||||
3892 | SelectionDAG &DAG) const { | ||||
3893 | switch (Subtarget->getTargetTriple().getObjectFormat()) { | ||||
3894 | default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 3894); | ||||
3895 | case Triple::COFF: | ||||
3896 | return LowerGlobalAddressWindows(Op, DAG); | ||||
3897 | case Triple::ELF: | ||||
3898 | return LowerGlobalAddressELF(Op, DAG); | ||||
3899 | case Triple::MachO: | ||||
3900 | return LowerGlobalAddressDarwin(Op, DAG); | ||||
3901 | } | ||||
3902 | } | ||||
3903 | |||||
3904 | SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, | ||||
3905 | SelectionDAG &DAG) const { | ||||
3906 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3907 | SDLoc dl(Op); | ||||
3908 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
3909 | const TargetMachine &TM = getTargetMachine(); | ||||
3910 | bool IsRO = isReadOnly(GV); | ||||
3911 | |||||
3912 | // promoteToConstantPool only if not generating XO text section | ||||
3913 | if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) | ||||
3914 | if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) | ||||
3915 | return V; | ||||
3916 | |||||
3917 | if (isPositionIndependent()) { | ||||
3918 | bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); | ||||
3919 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, | ||||
3920 | UseGOT_PREL ? ARMII::MO_GOT : 0); | ||||
3921 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | ||||
3922 | if (UseGOT_PREL) | ||||
3923 | Result = | ||||
3924 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | ||||
3925 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
3926 | return Result; | ||||
3927 | } else if (Subtarget->isROPI() && IsRO) { | ||||
3928 | // PC-relative. | ||||
3929 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); | ||||
3930 | SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); | ||||
3931 | return Result; | ||||
3932 | } else if (Subtarget->isRWPI() && !IsRO) { | ||||
3933 | // SB-relative. | ||||
3934 | SDValue RelAddr; | ||||
3935 | if (Subtarget->useMovt()) { | ||||
3936 | ++NumMovwMovt; | ||||
3937 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); | ||||
3938 | RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); | ||||
3939 | } else { // use literal pool for address constant | ||||
3940 | ARMConstantPoolValue *CPV = | ||||
3941 | ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); | ||||
3942 | SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
3943 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3944 | RelAddr = DAG.getLoad( | ||||
3945 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
3946 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3947 | } | ||||
3948 | SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); | ||||
3949 | SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); | ||||
3950 | return Result; | ||||
3951 | } | ||||
3952 | |||||
3953 | // If we have T2 ops, we can materialize the address directly via movt/movw | ||||
3954 | // pair. This is always cheaper. | ||||
3955 | if (Subtarget->useMovt()) { | ||||
3956 | ++NumMovwMovt; | ||||
3957 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
3958 | // operands, expand this into two nodes. | ||||
3959 | return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, | ||||
3960 | DAG.getTargetGlobalAddress(GV, dl, PtrVT)); | ||||
3961 | } else { | ||||
3962 | SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4)); | ||||
3963 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
3964 | return DAG.getLoad( | ||||
3965 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
3966 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
3967 | } | ||||
3968 | } | ||||
3969 | |||||
3970 | SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, | ||||
3971 | SelectionDAG &DAG) const { | ||||
3972 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3973, __extension__ __PRETTY_FUNCTION__)) | ||||
3973 | "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3973, __extension__ __PRETTY_FUNCTION__)); | ||||
3974 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
3975 | SDLoc dl(Op); | ||||
3976 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
3977 | |||||
3978 | if (Subtarget->useMovt()) | ||||
3979 | ++NumMovwMovt; | ||||
3980 | |||||
3981 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
3982 | // operands, expand this into multiple nodes | ||||
3983 | unsigned Wrapper = | ||||
3984 | isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; | ||||
3985 | |||||
3986 | SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); | ||||
3987 | SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); | ||||
3988 | |||||
3989 | if (Subtarget->isGVIndirectSymbol(GV)) | ||||
3990 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, | ||||
3991 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
3992 | return Result; | ||||
3993 | } | ||||
3994 | |||||
3995 | SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, | ||||
3996 | SelectionDAG &DAG) const { | ||||
3997 | assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() && "non-Windows COFF is not supported") ? void (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3997, __extension__ __PRETTY_FUNCTION__)); | ||||
3998 | assert(Subtarget->useMovt() &&(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3999, __extension__ __PRETTY_FUNCTION__)) | ||||
3999 | "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt() && "Windows on ARM expects to use movw/movt") ? void (0) : __assert_fail ("Subtarget->useMovt() && \"Windows on ARM expects to use movw/movt\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 3999, __extension__ __PRETTY_FUNCTION__)); | ||||
4000 | assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4001, __extension__ __PRETTY_FUNCTION__)) | ||||
4001 | "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows" ) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4001, __extension__ __PRETTY_FUNCTION__)); | ||||
4002 | |||||
4003 | const TargetMachine &TM = getTargetMachine(); | ||||
4004 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); | ||||
4005 | ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG; | ||||
4006 | if (GV->hasDLLImportStorageClass()) | ||||
4007 | TargetFlags = ARMII::MO_DLLIMPORT; | ||||
4008 | else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) | ||||
4009 | TargetFlags = ARMII::MO_COFFSTUB; | ||||
4010 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4011 | SDValue Result; | ||||
4012 | SDLoc DL(Op); | ||||
4013 | |||||
4014 | ++NumMovwMovt; | ||||
4015 | |||||
4016 | // FIXME: Once remat is capable of dealing with instructions with register | ||||
4017 | // operands, expand this into two nodes. | ||||
4018 | Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, | ||||
4019 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0, | ||||
4020 | TargetFlags)); | ||||
4021 | if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) | ||||
4022 | Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, | ||||
4023 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); | ||||
4024 | return Result; | ||||
4025 | } | ||||
4026 | |||||
4027 | SDValue | ||||
4028 | ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { | ||||
4029 | SDLoc dl(Op); | ||||
4030 | SDValue Val = DAG.getConstant(0, dl, MVT::i32); | ||||
4031 | return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, | ||||
4032 | DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), | ||||
4033 | Op.getOperand(1), Val); | ||||
4034 | } | ||||
4035 | |||||
4036 | SDValue | ||||
4037 | ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { | ||||
4038 | SDLoc dl(Op); | ||||
4039 | return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), | ||||
4040 | Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); | ||||
4041 | } | ||||
4042 | |||||
4043 | SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, | ||||
4044 | SelectionDAG &DAG) const { | ||||
4045 | SDLoc dl(Op); | ||||
4046 | return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, | ||||
4047 | Op.getOperand(0)); | ||||
4048 | } | ||||
4049 | |||||
4050 | SDValue ARMTargetLowering::LowerINTRINSIC_VOID( | ||||
4051 | SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { | ||||
4052 | unsigned IntNo = | ||||
4053 | cast<ConstantSDNode>( | ||||
4054 | Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other)) | ||||
4055 | ->getZExtValue(); | ||||
4056 | switch (IntNo) { | ||||
4057 | default: | ||||
4058 | return SDValue(); // Don't custom lower most intrinsics. | ||||
4059 | case Intrinsic::arm_gnu_eabi_mcount: { | ||||
4060 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4061 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4062 | SDLoc dl(Op); | ||||
4063 | SDValue Chain = Op.getOperand(0); | ||||
4064 | // call "\01__gnu_mcount_nc" | ||||
4065 | const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); | ||||
4066 | const uint32_t *Mask = | ||||
4067 | ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); | ||||
4068 | assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention" ) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4068, __extension__ __PRETTY_FUNCTION__)); | ||||
4069 | // Mark LR an implicit live-in. | ||||
4070 | Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); | ||||
4071 | SDValue ReturnAddress = | ||||
4072 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT); | ||||
4073 | constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue}; | ||||
4074 | SDValue Callee = | ||||
4075 | DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0); | ||||
4076 | SDValue RegisterMask = DAG.getRegisterMask(Mask); | ||||
4077 | if (Subtarget->isThumb()) | ||||
4078 | return SDValue( | ||||
4079 | DAG.getMachineNode( | ||||
4080 | ARM::tBL_PUSHLR, dl, ResultTys, | ||||
4081 | {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT), | ||||
4082 | DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}), | ||||
4083 | 0); | ||||
4084 | return SDValue( | ||||
4085 | DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys, | ||||
4086 | {ReturnAddress, Callee, RegisterMask, Chain}), | ||||
4087 | 0); | ||||
4088 | } | ||||
4089 | } | ||||
4090 | } | ||||
4091 | |||||
4092 | SDValue | ||||
4093 | ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, | ||||
4094 | const ARMSubtarget *Subtarget) const { | ||||
4095 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); | ||||
4096 | SDLoc dl(Op); | ||||
4097 | switch (IntNo) { | ||||
4098 | default: return SDValue(); // Don't custom lower most intrinsics. | ||||
4099 | case Intrinsic::thread_pointer: { | ||||
4100 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4101 | return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); | ||||
4102 | } | ||||
4103 | case Intrinsic::arm_cls: { | ||||
4104 | const SDValue &Operand = Op.getOperand(1); | ||||
4105 | const EVT VTy = Op.getValueType(); | ||||
4106 | SDValue SRA = | ||||
4107 | DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy)); | ||||
4108 | SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand); | ||||
4109 | SDValue SHL = | ||||
4110 | DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy)); | ||||
4111 | SDValue OR = | ||||
4112 | DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy)); | ||||
4113 | SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR); | ||||
4114 | return Result; | ||||
4115 | } | ||||
4116 | case Intrinsic::arm_cls64: { | ||||
4117 | // cls(x) = if cls(hi(x)) != 31 then cls(hi(x)) | ||||
4118 | // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x))) | ||||
4119 | const SDValue &Operand = Op.getOperand(1); | ||||
4120 | const EVT VTy = Op.getValueType(); | ||||
4121 | SDValue Lo, Hi; | ||||
4122 | std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy); | ||||
4123 | SDValue Constant0 = DAG.getConstant(0, dl, VTy); | ||||
4124 | SDValue Constant1 = DAG.getConstant(1, dl, VTy); | ||||
4125 | SDValue Constant31 = DAG.getConstant(31, dl, VTy); | ||||
4126 | SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31); | ||||
4127 | SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi); | ||||
4128 | SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1); | ||||
4129 | SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1); | ||||
4130 | SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi); | ||||
4131 | SDValue CheckLo = | ||||
4132 | DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ); | ||||
4133 | SDValue HiIsZero = | ||||
4134 | DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ); | ||||
4135 | SDValue AdjustedLo = | ||||
4136 | DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy)); | ||||
4137 | SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo); | ||||
4138 | SDValue Result = | ||||
4139 | DAG.getSelect(dl, VTy, CheckLo, | ||||
4140 | DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi); | ||||
4141 | return Result; | ||||
4142 | } | ||||
4143 | case Intrinsic::eh_sjlj_lsda: { | ||||
4144 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4145 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4146 | unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); | ||||
4147 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4148 | SDValue CPAddr; | ||||
4149 | bool IsPositionIndependent = isPositionIndependent(); | ||||
4150 | unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; | ||||
4151 | ARMConstantPoolValue *CPV = | ||||
4152 | ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, | ||||
4153 | ARMCP::CPLSDA, PCAdj); | ||||
4154 | CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4)); | ||||
4155 | CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); | ||||
4156 | SDValue Result = DAG.getLoad( | ||||
4157 | PtrVT, dl, DAG.getEntryNode(), CPAddr, | ||||
4158 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); | ||||
4159 | |||||
4160 | if (IsPositionIndependent) { | ||||
4161 | SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); | ||||
4162 | Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); | ||||
4163 | } | ||||
4164 | return Result; | ||||
4165 | } | ||||
4166 | case Intrinsic::arm_neon_vabs: | ||||
4167 | return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), | ||||
4168 | Op.getOperand(1)); | ||||
4169 | case Intrinsic::arm_neon_vmulls: | ||||
4170 | case Intrinsic::arm_neon_vmullu: { | ||||
4171 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) | ||||
4172 | ? ARMISD::VMULLs : ARMISD::VMULLu; | ||||
4173 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4174 | Op.getOperand(1), Op.getOperand(2)); | ||||
4175 | } | ||||
4176 | case Intrinsic::arm_neon_vminnm: | ||||
4177 | case Intrinsic::arm_neon_vmaxnm: { | ||||
4178 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) | ||||
4179 | ? ISD::FMINNUM : ISD::FMAXNUM; | ||||
4180 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4181 | Op.getOperand(1), Op.getOperand(2)); | ||||
4182 | } | ||||
4183 | case Intrinsic::arm_neon_vminu: | ||||
4184 | case Intrinsic::arm_neon_vmaxu: { | ||||
4185 | if (Op.getValueType().isFloatingPoint()) | ||||
4186 | return SDValue(); | ||||
4187 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) | ||||
4188 | ? ISD::UMIN : ISD::UMAX; | ||||
4189 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4190 | Op.getOperand(1), Op.getOperand(2)); | ||||
4191 | } | ||||
4192 | case Intrinsic::arm_neon_vmins: | ||||
4193 | case Intrinsic::arm_neon_vmaxs: { | ||||
4194 | // v{min,max}s is overloaded between signed integers and floats. | ||||
4195 | if (!Op.getValueType().isFloatingPoint()) { | ||||
4196 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | ||||
4197 | ? ISD::SMIN : ISD::SMAX; | ||||
4198 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4199 | Op.getOperand(1), Op.getOperand(2)); | ||||
4200 | } | ||||
4201 | unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) | ||||
4202 | ? ISD::FMINIMUM : ISD::FMAXIMUM; | ||||
4203 | return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), | ||||
4204 | Op.getOperand(1), Op.getOperand(2)); | ||||
4205 | } | ||||
4206 | case Intrinsic::arm_neon_vtbl1: | ||||
4207 | return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), | ||||
4208 | Op.getOperand(1), Op.getOperand(2)); | ||||
4209 | case Intrinsic::arm_neon_vtbl2: | ||||
4210 | return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), | ||||
4211 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4212 | case Intrinsic::arm_mve_pred_i2v: | ||||
4213 | case Intrinsic::arm_mve_pred_v2i: | ||||
4214 | return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(), | ||||
4215 | Op.getOperand(1)); | ||||
4216 | case Intrinsic::arm_mve_vreinterpretq: | ||||
4217 | return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(), | ||||
4218 | Op.getOperand(1)); | ||||
4219 | case Intrinsic::arm_mve_lsll: | ||||
4220 | return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(), | ||||
4221 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4222 | case Intrinsic::arm_mve_asrl: | ||||
4223 | return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(), | ||||
4224 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); | ||||
4225 | } | ||||
4226 | } | ||||
4227 | |||||
4228 | static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, | ||||
4229 | const ARMSubtarget *Subtarget) { | ||||
4230 | SDLoc dl(Op); | ||||
4231 | ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); | ||||
4232 | auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); | ||||
4233 | if (SSID == SyncScope::SingleThread) | ||||
4234 | return Op; | ||||
4235 | |||||
4236 | if (!Subtarget->hasDataBarrier()) { | ||||
4237 | // Some ARMv6 cpus can support data barriers with an mcr instruction. | ||||
4238 | // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get | ||||
4239 | // here. | ||||
4240 | assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4241, __extension__ __PRETTY_FUNCTION__)) | ||||
4241 | "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!" ) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4241, __extension__ __PRETTY_FUNCTION__)); | ||||
4242 | return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), | ||||
4243 | DAG.getConstant(0, dl, MVT::i32)); | ||||
4244 | } | ||||
4245 | |||||
4246 | ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); | ||||
4247 | AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); | ||||
4248 | ARM_MB::MemBOpt Domain = ARM_MB::ISH; | ||||
4249 | if (Subtarget->isMClass()) { | ||||
4250 | // Only a full system barrier exists in the M-class architectures. | ||||
4251 | Domain = ARM_MB::SY; | ||||
4252 | } else if (Subtarget->preferISHSTBarriers() && | ||||
4253 | Ord == AtomicOrdering::Release) { | ||||
4254 | // Swift happens to implement ISHST barriers in a way that's compatible with | ||||
4255 | // Release semantics but weaker than ISH so we'd be fools not to use | ||||
4256 | // it. Beware: other processors probably don't! | ||||
4257 | Domain = ARM_MB::ISHST; | ||||
4258 | } | ||||
4259 | |||||
4260 | return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), | ||||
4261 | DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), | ||||
4262 | DAG.getConstant(Domain, dl, MVT::i32)); | ||||
4263 | } | ||||
4264 | |||||
4265 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, | ||||
4266 | const ARMSubtarget *Subtarget) { | ||||
4267 | // ARM pre v5TE and Thumb1 does not have preload instructions. | ||||
4268 | if (!(Subtarget->isThumb2() || | ||||
4269 | (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) | ||||
4270 | // Just preserve the chain. | ||||
4271 | return Op.getOperand(0); | ||||
4272 | |||||
4273 | SDLoc dl(Op); | ||||
4274 | unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; | ||||
4275 | if (!isRead && | ||||
4276 | (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) | ||||
4277 | // ARMv7 with MP extension has PLDW. | ||||
4278 | return Op.getOperand(0); | ||||
4279 | |||||
4280 | unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); | ||||
4281 | if (Subtarget->isThumb()) { | ||||
4282 | // Invert the bits. | ||||
4283 | isRead = ~isRead & 1; | ||||
4284 | isData = ~isData & 1; | ||||
4285 | } | ||||
4286 | |||||
4287 | return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), | ||||
4288 | Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), | ||||
4289 | DAG.getConstant(isData, dl, MVT::i32)); | ||||
4290 | } | ||||
4291 | |||||
4292 | static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { | ||||
4293 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4294 | ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); | ||||
4295 | |||||
4296 | // vastart just stores the address of the VarArgsFrameIndex slot into the | ||||
4297 | // memory location argument. | ||||
4298 | SDLoc dl(Op); | ||||
4299 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); | ||||
4300 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); | ||||
4301 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); | ||||
4302 | return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), | ||||
4303 | MachinePointerInfo(SV)); | ||||
4304 | } | ||||
4305 | |||||
4306 | SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, | ||||
4307 | CCValAssign &NextVA, | ||||
4308 | SDValue &Root, | ||||
4309 | SelectionDAG &DAG, | ||||
4310 | const SDLoc &dl) const { | ||||
4311 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4312 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4313 | |||||
4314 | const TargetRegisterClass *RC; | ||||
4315 | if (AFI->isThumb1OnlyFunction()) | ||||
4316 | RC = &ARM::tGPRRegClass; | ||||
4317 | else | ||||
4318 | RC = &ARM::GPRRegClass; | ||||
4319 | |||||
4320 | // Transform the arguments stored in physical registers into virtual ones. | ||||
4321 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | ||||
4322 | SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | ||||
4323 | |||||
4324 | SDValue ArgValue2; | ||||
4325 | if (NextVA.isMemLoc()) { | ||||
4326 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4327 | int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); | ||||
4328 | |||||
4329 | // Create load node to retrieve arguments from the stack. | ||||
4330 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); | ||||
4331 | ArgValue2 = DAG.getLoad( | ||||
4332 | MVT::i32, dl, Root, FIN, | ||||
4333 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | ||||
4334 | } else { | ||||
4335 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); | ||||
4336 | ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); | ||||
4337 | } | ||||
4338 | if (!Subtarget->isLittle()) | ||||
4339 | std::swap (ArgValue, ArgValue2); | ||||
4340 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); | ||||
4341 | } | ||||
4342 | |||||
4343 | // The remaining GPRs hold either the beginning of variable-argument | ||||
4344 | // data, or the beginning of an aggregate passed by value (usually | ||||
4345 | // byval). Either way, we allocate stack slots adjacent to the data | ||||
4346 | // provided by our caller, and store the unallocated registers there. | ||||
4347 | // If this is a variadic function, the va_list pointer will begin with | ||||
4348 | // these values; otherwise, this reassembles a (byval) structure that | ||||
4349 | // was split between registers and memory. | ||||
4350 | // Return: The frame index registers were stored into. | ||||
4351 | int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, | ||||
4352 | const SDLoc &dl, SDValue &Chain, | ||||
4353 | const Value *OrigArg, | ||||
4354 | unsigned InRegsParamRecordIdx, | ||||
4355 | int ArgOffset, unsigned ArgSize) const { | ||||
4356 | // Currently, two use-cases possible: | ||||
4357 | // Case #1. Non-var-args function, and we meet first byval parameter. | ||||
4358 | // Setup first unallocated register as first byval register; | ||||
4359 | // eat all remained registers | ||||
4360 | // (these two actions are performed by HandleByVal method). | ||||
4361 | // Then, here, we initialize stack frame with | ||||
4362 | // "store-reg" instructions. | ||||
4363 | // Case #2. Var-args function, that doesn't contain byval parameters. | ||||
4364 | // The same: eat all remained unallocated registers, | ||||
4365 | // initialize stack frame. | ||||
4366 | |||||
4367 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4368 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4369 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4370 | unsigned RBegin, REnd; | ||||
4371 | if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { | ||||
4372 | CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); | ||||
4373 | } else { | ||||
4374 | unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | ||||
4375 | RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; | ||||
4376 | REnd = ARM::R4; | ||||
4377 | } | ||||
4378 | |||||
4379 | if (REnd != RBegin) | ||||
4380 | ArgOffset = -4 * (ARM::R4 - RBegin); | ||||
4381 | |||||
4382 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4383 | int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); | ||||
4384 | SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); | ||||
4385 | |||||
4386 | SmallVector<SDValue, 4> MemOps; | ||||
4387 | const TargetRegisterClass *RC = | ||||
4388 | AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; | ||||
4389 | |||||
4390 | for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { | ||||
4391 | Register VReg = MF.addLiveIn(Reg, RC); | ||||
4392 | SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); | ||||
4393 | SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, | ||||
4394 | MachinePointerInfo(OrigArg, 4 * i)); | ||||
4395 | MemOps.push_back(Store); | ||||
4396 | FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); | ||||
4397 | } | ||||
4398 | |||||
4399 | if (!MemOps.empty()) | ||||
4400 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); | ||||
4401 | return FrameIndex; | ||||
4402 | } | ||||
4403 | |||||
4404 | // Setup stack frame, the va_list pointer will start from. | ||||
4405 | void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, | ||||
4406 | const SDLoc &dl, SDValue &Chain, | ||||
4407 | unsigned ArgOffset, | ||||
4408 | unsigned TotalArgRegsSaveSize, | ||||
4409 | bool ForceMutable) const { | ||||
4410 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4411 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4412 | |||||
4413 | // Try to store any remaining integer argument regs | ||||
4414 | // to their spots on the stack so that they may be loaded by dereferencing | ||||
4415 | // the result of va_next. | ||||
4416 | // If there is no regs to be stored, just point address after last | ||||
4417 | // argument passed via stack. | ||||
4418 | int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, | ||||
4419 | CCInfo.getInRegsParamsCount(), | ||||
4420 | CCInfo.getNextStackOffset(), | ||||
4421 | std::max(4U, TotalArgRegsSaveSize)); | ||||
4422 | AFI->setVarArgsFrameIndex(FrameIndex); | ||||
4423 | } | ||||
4424 | |||||
4425 | bool ARMTargetLowering::splitValueIntoRegisterParts( | ||||
4426 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, | ||||
4427 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { | ||||
4428 | EVT ValueVT = Val.getValueType(); | ||||
4429 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { | ||||
4430 | unsigned ValueBits = ValueVT.getSizeInBits(); | ||||
4431 | unsigned PartBits = PartVT.getSizeInBits(); | ||||
4432 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); | ||||
4433 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); | ||||
4434 | Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); | ||||
4435 | Parts[0] = Val; | ||||
4436 | return true; | ||||
4437 | } | ||||
4438 | return false; | ||||
4439 | } | ||||
4440 | |||||
4441 | SDValue ARMTargetLowering::joinRegisterPartsIntoValue( | ||||
4442 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, | ||||
4443 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { | ||||
4444 | if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { | ||||
4445 | unsigned ValueBits = ValueVT.getSizeInBits(); | ||||
4446 | unsigned PartBits = PartVT.getSizeInBits(); | ||||
4447 | SDValue Val = Parts[0]; | ||||
4448 | |||||
4449 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); | ||||
4450 | Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); | ||||
4451 | Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); | ||||
4452 | return Val; | ||||
4453 | } | ||||
4454 | return SDValue(); | ||||
4455 | } | ||||
4456 | |||||
4457 | SDValue ARMTargetLowering::LowerFormalArguments( | ||||
4458 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, | ||||
4459 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, | ||||
4460 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { | ||||
4461 | MachineFunction &MF = DAG.getMachineFunction(); | ||||
4462 | MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
4463 | |||||
4464 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); | ||||
4465 | |||||
4466 | // Assign locations to all of the incoming arguments. | ||||
4467 | SmallVector<CCValAssign, 16> ArgLocs; | ||||
4468 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, | ||||
4469 | *DAG.getContext()); | ||||
4470 | CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); | ||||
4471 | |||||
4472 | SmallVector<SDValue, 16> ArgValues; | ||||
4473 | SDValue ArgValue; | ||||
4474 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); | ||||
4475 | unsigned CurArgIdx = 0; | ||||
4476 | |||||
4477 | // Initially ArgRegsSaveSize is zero. | ||||
4478 | // Then we increase this value each time we meet byval parameter. | ||||
4479 | // We also increase this value in case of varargs function. | ||||
4480 | AFI->setArgRegsSaveSize(0); | ||||
4481 | |||||
4482 | // Calculate the amount of stack space that we need to allocate to store | ||||
4483 | // byval and variadic arguments that are passed in registers. | ||||
4484 | // We need to know this before we allocate the first byval or variadic | ||||
4485 | // argument, as they will be allocated a stack slot below the CFA (Canonical | ||||
4486 | // Frame Address, the stack pointer at entry to the function). | ||||
4487 | unsigned ArgRegBegin = ARM::R4; | ||||
4488 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | ||||
4489 | if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) | ||||
4490 | break; | ||||
4491 | |||||
4492 | CCValAssign &VA = ArgLocs[i]; | ||||
4493 | unsigned Index = VA.getValNo(); | ||||
4494 | ISD::ArgFlagsTy Flags = Ins[Index].Flags; | ||||
4495 | if (!Flags.isByVal()) | ||||
4496 | continue; | ||||
4497 | |||||
4498 | assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg" ) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4498, __extension__ __PRETTY_FUNCTION__)); | ||||
4499 | unsigned RBegin, REnd; | ||||
4500 | CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); | ||||
4501 | ArgRegBegin = std::min(ArgRegBegin, RBegin); | ||||
4502 | |||||
4503 | CCInfo.nextInRegsParam(); | ||||
4504 | } | ||||
4505 | CCInfo.rewindByValRegsInfo(); | ||||
4506 | |||||
4507 | int lastInsIndex = -1; | ||||
4508 | if (isVarArg && MFI.hasVAStart()) { | ||||
4509 | unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); | ||||
4510 | if (RegIdx != std::size(GPRArgRegs)) | ||||
4511 | ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); | ||||
4512 | } | ||||
4513 | |||||
4514 | unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); | ||||
4515 | AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); | ||||
4516 | auto PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
4517 | |||||
4518 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | ||||
4519 | CCValAssign &VA = ArgLocs[i]; | ||||
4520 | if (Ins[VA.getValNo()].isOrigArg()) { | ||||
4521 | std::advance(CurOrigArg, | ||||
4522 | Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); | ||||
4523 | CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); | ||||
4524 | } | ||||
4525 | // Arguments stored in registers. | ||||
4526 | if (VA.isRegLoc()) { | ||||
4527 | EVT RegVT = VA.getLocVT(); | ||||
4528 | |||||
4529 | if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) { | ||||
4530 | // f64 and vector types are split up into multiple registers or | ||||
4531 | // combinations of registers and stack slots. | ||||
4532 | SDValue ArgValue1 = | ||||
4533 | GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4534 | VA = ArgLocs[++i]; // skip ahead to next loc | ||||
4535 | SDValue ArgValue2; | ||||
4536 | if (VA.isMemLoc()) { | ||||
4537 | int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); | ||||
4538 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | ||||
4539 | ArgValue2 = DAG.getLoad( | ||||
4540 | MVT::f64, dl, Chain, FIN, | ||||
4541 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | ||||
4542 | } else { | ||||
4543 | ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4544 | } | ||||
4545 | ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); | ||||
4546 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | ||||
4547 | ArgValue1, DAG.getIntPtrConstant(0, dl)); | ||||
4548 | ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, | ||||
4549 | ArgValue2, DAG.getIntPtrConstant(1, dl)); | ||||
4550 | } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) { | ||||
4551 | ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); | ||||
4552 | } else { | ||||
4553 | const TargetRegisterClass *RC; | ||||
4554 | |||||
4555 | if (RegVT == MVT::f16 || RegVT == MVT::bf16) | ||||
4556 | RC = &ARM::HPRRegClass; | ||||
4557 | else if (RegVT == MVT::f32) | ||||
4558 | RC = &ARM::SPRRegClass; | ||||
4559 | else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 || | ||||
4560 | RegVT == MVT::v4bf16) | ||||
4561 | RC = &ARM::DPRRegClass; | ||||
4562 | else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 || | ||||
4563 | RegVT == MVT::v8bf16) | ||||
4564 | RC = &ARM::QPRRegClass; | ||||
4565 | else if (RegVT == MVT::i32) | ||||
4566 | RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass | ||||
4567 | : &ARM::GPRRegClass; | ||||
4568 | else | ||||
4569 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4569); | ||||
4570 | |||||
4571 | // Transform the arguments in physical registers into virtual ones. | ||||
4572 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); | ||||
4573 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); | ||||
4574 | |||||
4575 | // If this value is passed in r0 and has the returned attribute (e.g. | ||||
4576 | // C++ 'structors), record this fact for later use. | ||||
4577 | if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) { | ||||
4578 | AFI->setPreservesR0(); | ||||
4579 | } | ||||
4580 | } | ||||
4581 | |||||
4582 | // If this is an 8 or 16-bit value, it is really passed promoted | ||||
4583 | // to 32 bits. Insert an assert[sz]ext to capture this, then | ||||
4584 | // truncate to the right size. | ||||
4585 | switch (VA.getLocInfo()) { | ||||
4586 | default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 4586); | ||||
4587 | case CCValAssign::Full: break; | ||||
4588 | case CCValAssign::BCvt: | ||||
4589 | ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); | ||||
4590 | break; | ||||
4591 | case CCValAssign::SExt: | ||||
4592 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, | ||||
4593 | DAG.getValueType(VA.getValVT())); | ||||
4594 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | ||||
4595 | break; | ||||
4596 | case CCValAssign::ZExt: | ||||
4597 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, | ||||
4598 | DAG.getValueType(VA.getValVT())); | ||||
4599 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); | ||||
4600 | break; | ||||
4601 | } | ||||
4602 | |||||
4603 | // f16 arguments have their size extended to 4 bytes and passed as if they | ||||
4604 | // had been copied to the LSBs of a 32-bit register. | ||||
4605 | // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI) | ||||
4606 | if (VA.needsCustom() && | ||||
4607 | (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) | ||||
4608 | ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue); | ||||
4609 | |||||
4610 | InVals.push_back(ArgValue); | ||||
4611 | } else { // VA.isRegLoc() | ||||
4612 | // Only arguments passed on the stack should make it here. | ||||
4613 | assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail ("VA.isMemLoc()", "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4613, __extension__ __PRETTY_FUNCTION__)); | ||||
4614 | assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 && "i64 should already be lowered") ? void (0) : __assert_fail ( "VA.getValVT() != MVT::i64 && \"i64 should already be lowered\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4614, __extension__ __PRETTY_FUNCTION__)); | ||||
4615 | |||||
4616 | int index = VA.getValNo(); | ||||
4617 | |||||
4618 | // Some Ins[] entries become multiple ArgLoc[] entries. | ||||
4619 | // Process them only once. | ||||
4620 | if (index != lastInsIndex) | ||||
4621 | { | ||||
4622 | ISD::ArgFlagsTy Flags = Ins[index].Flags; | ||||
4623 | // FIXME: For now, all byval parameter objects are marked mutable. | ||||
4624 | // This can be changed with more analysis. | ||||
4625 | // In case of tail call optimization mark all arguments mutable. | ||||
4626 | // Since they could be overwritten by lowering of arguments in case of | ||||
4627 | // a tail call. | ||||
4628 | if (Flags.isByVal()) { | ||||
4629 | assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4630, __extension__ __PRETTY_FUNCTION__)) | ||||
4630 | "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit" ) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4630, __extension__ __PRETTY_FUNCTION__)); | ||||
4631 | unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); | ||||
4632 | |||||
4633 | int FrameIndex = StoreByValRegs( | ||||
4634 | CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, | ||||
4635 | VA.getLocMemOffset(), Flags.getByValSize()); | ||||
4636 | InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); | ||||
4637 | CCInfo.nextInRegsParam(); | ||||
4638 | } else { | ||||
4639 | unsigned FIOffset = VA.getLocMemOffset(); | ||||
4640 | int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, | ||||
4641 | FIOffset, true); | ||||
4642 | |||||
4643 | // Create load nodes to retrieve arguments from the stack. | ||||
4644 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | ||||
4645 | InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, | ||||
4646 | MachinePointerInfo::getFixedStack( | ||||
4647 | DAG.getMachineFunction(), FI))); | ||||
4648 | } | ||||
4649 | lastInsIndex = index; | ||||
4650 | } | ||||
4651 | } | ||||
4652 | } | ||||
4653 | |||||
4654 | // varargs | ||||
4655 | if (isVarArg && MFI.hasVAStart()) { | ||||
4656 | VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), | ||||
4657 | TotalArgRegsSaveSize); | ||||
4658 | if (AFI->isCmseNSEntryFunction()) { | ||||
4659 | DiagnosticInfoUnsupported Diag( | ||||
4660 | DAG.getMachineFunction().getFunction(), | ||||
4661 | "secure entry function must not be variadic", dl.getDebugLoc()); | ||||
4662 | DAG.getContext()->diagnose(Diag); | ||||
4663 | } | ||||
4664 | } | ||||
4665 | |||||
4666 | unsigned StackArgSize = CCInfo.getNextStackOffset(); | ||||
4667 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; | ||||
4668 | if (canGuaranteeTCO(CallConv, TailCallOpt)) { | ||||
4669 | // The only way to guarantee a tail call is if the callee restores its | ||||
4670 | // argument area, but it must also keep the stack aligned when doing so. | ||||
4671 | const DataLayout &DL = DAG.getDataLayout(); | ||||
4672 | StackArgSize = alignTo(StackArgSize, DL.getStackAlignment()); | ||||
4673 | |||||
4674 | AFI->setArgumentStackToRestore(StackArgSize); | ||||
4675 | } | ||||
4676 | AFI->setArgumentStackSize(StackArgSize); | ||||
4677 | |||||
4678 | if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) { | ||||
4679 | DiagnosticInfoUnsupported Diag( | ||||
4680 | DAG.getMachineFunction().getFunction(), | ||||
4681 | "secure entry function requires arguments on stack", dl.getDebugLoc()); | ||||
4682 | DAG.getContext()->diagnose(Diag); | ||||
4683 | } | ||||
4684 | |||||
4685 | return Chain; | ||||
4686 | } | ||||
4687 | |||||
4688 | /// isFloatingPointZero - Return true if this is +0.0. | ||||
4689 | static bool isFloatingPointZero(SDValue Op) { | ||||
4690 | if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) | ||||
4691 | return CFP->getValueAPF().isPosZero(); | ||||
4692 | else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { | ||||
4693 | // Maybe this has already been legalized into the constant pool? | ||||
4694 | if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { | ||||
4695 | SDValue WrapperOp = Op.getOperand(1).getOperand(0); | ||||
4696 | if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) | ||||
4697 | if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) | ||||
4698 | return CFP->getValueAPF().isPosZero(); | ||||
4699 | } | ||||
4700 | } else if (Op->getOpcode() == ISD::BITCAST && | ||||
4701 | Op->getValueType(0) == MVT::f64) { | ||||
4702 | // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) | ||||
4703 | // created by LowerConstantFP(). | ||||
4704 | SDValue BitcastOp = Op->getOperand(0); | ||||
4705 | if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && | ||||
4706 | isNullConstant(BitcastOp->getOperand(0))) | ||||
4707 | return true; | ||||
4708 | } | ||||
4709 | return false; | ||||
4710 | } | ||||
4711 | |||||
4712 | /// Returns appropriate ARM CMP (cmp) and corresponding condition code for | ||||
4713 | /// the given operands. | ||||
4714 | SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, | ||||
4715 | SDValue &ARMcc, SelectionDAG &DAG, | ||||
4716 | const SDLoc &dl) const { | ||||
4717 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { | ||||
4718 | unsigned C = RHSC->getZExtValue(); | ||||
4719 | if (!isLegalICmpImmediate((int32_t)C)) { | ||||
4720 | // Constant does not fit, try adjusting it by one. | ||||
4721 | switch (CC) { | ||||
4722 | default: break; | ||||
4723 | case ISD::SETLT: | ||||
4724 | case ISD::SETGE: | ||||
4725 | if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { | ||||
4726 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; | ||||
4727 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | ||||
4728 | } | ||||
4729 | break; | ||||
4730 | case ISD::SETULT: | ||||
4731 | case ISD::SETUGE: | ||||
4732 | if (C != 0 && isLegalICmpImmediate(C-1)) { | ||||
4733 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; | ||||
4734 | RHS = DAG.getConstant(C - 1, dl, MVT::i32); | ||||
4735 | } | ||||
4736 | break; | ||||
4737 | case ISD::SETLE: | ||||
4738 | case ISD::SETGT: | ||||
4739 | if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { | ||||
4740 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; | ||||
4741 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | ||||
4742 | } | ||||
4743 | break; | ||||
4744 | case ISD::SETULE: | ||||
4745 | case ISD::SETUGT: | ||||
4746 | if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { | ||||
4747 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; | ||||
4748 | RHS = DAG.getConstant(C + 1, dl, MVT::i32); | ||||
4749 | } | ||||
4750 | break; | ||||
4751 | } | ||||
4752 | } | ||||
4753 | } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && | ||||
4754 | (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { | ||||
4755 | // In ARM and Thumb-2, the compare instructions can shift their second | ||||
4756 | // operand. | ||||
4757 | CC = ISD::getSetCCSwappedOperands(CC); | ||||
4758 | std::swap(LHS, RHS); | ||||
4759 | } | ||||
4760 | |||||
4761 | // Thumb1 has very limited immediate modes, so turning an "and" into a | ||||
4762 | // shift can save multiple instructions. | ||||
4763 | // | ||||
4764 | // If we have (x & C1), and C1 is an appropriate mask, we can transform it | ||||
4765 | // into "((x << n) >> n)". But that isn't necessarily profitable on its | ||||
4766 | // own. If it's the operand to an unsigned comparison with an immediate, | ||||
4767 | // we can eliminate one of the shifts: we transform | ||||
4768 | // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)". | ||||
4769 | // | ||||
4770 | // We avoid transforming cases which aren't profitable due to encoding | ||||
4771 | // details: | ||||
4772 | // | ||||
4773 | // 1. C2 fits into the immediate field of a cmp, and the transformed version | ||||
4774 | // would not; in that case, we're essentially trading one immediate load for | ||||
4775 | // another. | ||||
4776 | // 2. C1 is 255 or 65535, so we can use uxtb or uxth. | ||||
4777 | // 3. C2 is zero; we have other code for this special case. | ||||
4778 | // | ||||
4779 | // FIXME: Figure out profitability for Thumb2; we usually can't save an | ||||
4780 | // instruction, since the AND is always one instruction anyway, but we could | ||||
4781 | // use narrow instructions in some cases. | ||||
4782 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND && | ||||
4783 | LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && | ||||
4784 | LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && | ||||
4785 | !isSignedIntSetCC(CC)) { | ||||
4786 | unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); | ||||
4787 | auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); | ||||
4788 | uint64_t RHSV = RHSC->getZExtValue(); | ||||
4789 | if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { | ||||
4790 | unsigned ShiftBits = llvm::countl_zero(Mask); | ||||
4791 | if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) { | ||||
4792 | SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32); | ||||
4793 | LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt); | ||||
4794 | RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32); | ||||
4795 | } | ||||
4796 | } | ||||
4797 | } | ||||
4798 | |||||
4799 | // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a | ||||
4800 | // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same | ||||
4801 | // way a cmp would. | ||||
4802 | // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and | ||||
4803 | // some tweaks to the heuristics for the previous and->shift transform. | ||||
4804 | // FIXME: Optimize cases where the LHS isn't a shift. | ||||
4805 | if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && | ||||
4806 | isa<ConstantSDNode>(RHS) && | ||||
4807 | cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && | ||||
4808 | CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && | ||||
4809 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { | ||||
4810 | unsigned ShiftAmt = | ||||
4811 | cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; | ||||
4812 | SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, | ||||
4813 | DAG.getVTList(MVT::i32, MVT::i32), | ||||
4814 | LHS.getOperand(0), | ||||
4815 | DAG.getConstant(ShiftAmt, dl, MVT::i32)); | ||||
4816 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, | ||||
4817 | Shift.getValue(1), SDValue()); | ||||
4818 | ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); | ||||
4819 | return Chain.getValue(1); | ||||
4820 | } | ||||
4821 | |||||
4822 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
4823 | |||||
4824 | // If the RHS is a constant zero then the V (overflow) flag will never be | ||||
4825 | // set. This can allow us to simplify GE to PL or LT to MI, which can be | ||||
4826 | // simpler for other passes (like the peephole optimiser) to deal with. | ||||
4827 | if (isNullConstant(RHS)) { | ||||
4828 | switch (CondCode) { | ||||
4829 | default: break; | ||||
4830 | case ARMCC::GE: | ||||
4831 | CondCode = ARMCC::PL; | ||||
4832 | break; | ||||
4833 | case ARMCC::LT: | ||||
4834 | CondCode = ARMCC::MI; | ||||
4835 | break; | ||||
4836 | } | ||||
4837 | } | ||||
4838 | |||||
4839 | ARMISD::NodeType CompareType; | ||||
4840 | switch (CondCode) { | ||||
4841 | default: | ||||
4842 | CompareType = ARMISD::CMP; | ||||
4843 | break; | ||||
4844 | case ARMCC::EQ: | ||||
4845 | case ARMCC::NE: | ||||
4846 | // Uses only Z Flag | ||||
4847 | CompareType = ARMISD::CMPZ; | ||||
4848 | break; | ||||
4849 | } | ||||
4850 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
4851 | return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); | ||||
4852 | } | ||||
4853 | |||||
4854 | /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. | ||||
4855 | SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, | ||||
4856 | SelectionDAG &DAG, const SDLoc &dl, | ||||
4857 | bool Signaling) const { | ||||
4858 | assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64)(static_cast <bool> (Subtarget->hasFP64() || RHS.getValueType () != MVT::f64) ? void (0) : __assert_fail ("Subtarget->hasFP64() || RHS.getValueType() != MVT::f64" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4858, __extension__ __PRETTY_FUNCTION__)); | ||||
4859 | SDValue Cmp; | ||||
4860 | if (!isFloatingPointZero(RHS)) | ||||
4861 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, | ||||
4862 | dl, MVT::Glue, LHS, RHS); | ||||
4863 | else | ||||
4864 | Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, | ||||
4865 | dl, MVT::Glue, LHS); | ||||
4866 | return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); | ||||
4867 | } | ||||
4868 | |||||
4869 | /// duplicateCmp - Glue values can have only one use, so this function | ||||
4870 | /// duplicates a comparison node. | ||||
4871 | SDValue | ||||
4872 | ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { | ||||
4873 | unsigned Opc = Cmp.getOpcode(); | ||||
4874 | SDLoc DL(Cmp); | ||||
4875 | if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) | ||||
4876 | return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | ||||
4877 | |||||
4878 | assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation" ) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4878, __extension__ __PRETTY_FUNCTION__)); | ||||
4879 | Cmp = Cmp.getOperand(0); | ||||
4880 | Opc = Cmp.getOpcode(); | ||||
4881 | if (Opc == ARMISD::CMPFP) | ||||
4882 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); | ||||
4883 | else { | ||||
4884 | assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT" ) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4884, __extension__ __PRETTY_FUNCTION__)); | ||||
4885 | Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); | ||||
4886 | } | ||||
4887 | return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); | ||||
4888 | } | ||||
4889 | |||||
4890 | // This function returns three things: the arithmetic computation itself | ||||
4891 | // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The | ||||
4892 | // comparison and the condition code define the case in which the arithmetic | ||||
4893 | // computation *does not* overflow. | ||||
4894 | std::pair<SDValue, SDValue> | ||||
4895 | ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, | ||||
4896 | SDValue &ARMcc) const { | ||||
4897 | assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 && "Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4897, __extension__ __PRETTY_FUNCTION__)); | ||||
4898 | |||||
4899 | SDValue Value, OverflowCmp; | ||||
4900 | SDValue LHS = Op.getOperand(0); | ||||
4901 | SDValue RHS = Op.getOperand(1); | ||||
4902 | SDLoc dl(Op); | ||||
4903 | |||||
4904 | // FIXME: We are currently always generating CMPs because we don't support | ||||
4905 | // generating CMN through the backend. This is not as good as the natural | ||||
4906 | // CMP case because it causes a register dependency and cannot be folded | ||||
4907 | // later. | ||||
4908 | |||||
4909 | switch (Op.getOpcode()) { | ||||
4910 | default: | ||||
4911 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 4911); | ||||
4912 | case ISD::SADDO: | ||||
4913 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | ||||
4914 | Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); | ||||
4915 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | ||||
4916 | break; | ||||
4917 | case ISD::UADDO: | ||||
4918 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | ||||
4919 | // We use ADDC here to correspond to its use in LowerUnsignedALUO. | ||||
4920 | // We do not use it in the USUBO case as Value may not be used. | ||||
4921 | Value = DAG.getNode(ARMISD::ADDC, dl, | ||||
4922 | DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) | ||||
4923 | .getValue(0); | ||||
4924 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); | ||||
4925 | break; | ||||
4926 | case ISD::SSUBO: | ||||
4927 | ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); | ||||
4928 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | ||||
4929 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | ||||
4930 | break; | ||||
4931 | case ISD::USUBO: | ||||
4932 | ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); | ||||
4933 | Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); | ||||
4934 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); | ||||
4935 | break; | ||||
4936 | case ISD::UMULO: | ||||
4937 | // We generate a UMUL_LOHI and then check if the high word is 0. | ||||
4938 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | ||||
4939 | Value = DAG.getNode(ISD::UMUL_LOHI, dl, | ||||
4940 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | ||||
4941 | LHS, RHS); | ||||
4942 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | ||||
4943 | DAG.getConstant(0, dl, MVT::i32)); | ||||
4944 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | ||||
4945 | break; | ||||
4946 | case ISD::SMULO: | ||||
4947 | // We generate a SMUL_LOHI and then check if all the bits of the high word | ||||
4948 | // are the same as the sign bit of the low word. | ||||
4949 | ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); | ||||
4950 | Value = DAG.getNode(ISD::SMUL_LOHI, dl, | ||||
4951 | DAG.getVTList(Op.getValueType(), Op.getValueType()), | ||||
4952 | LHS, RHS); | ||||
4953 | OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), | ||||
4954 | DAG.getNode(ISD::SRA, dl, Op.getValueType(), | ||||
4955 | Value.getValue(0), | ||||
4956 | DAG.getConstant(31, dl, MVT::i32))); | ||||
4957 | Value = Value.getValue(0); // We only want the low 32 bits for the result. | ||||
4958 | break; | ||||
4959 | } // switch (...) | ||||
4960 | |||||
4961 | return std::make_pair(Value, OverflowCmp); | ||||
4962 | } | ||||
4963 | |||||
4964 | SDValue | ||||
4965 | ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { | ||||
4966 | // Let legalize expand this if it isn't a legal type yet. | ||||
4967 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | ||||
4968 | return SDValue(); | ||||
4969 | |||||
4970 | SDValue Value, OverflowCmp; | ||||
4971 | SDValue ARMcc; | ||||
4972 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); | ||||
4973 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
4974 | SDLoc dl(Op); | ||||
4975 | // We use 0 and 1 as false and true values. | ||||
4976 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); | ||||
4977 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); | ||||
4978 | EVT VT = Op.getValueType(); | ||||
4979 | |||||
4980 | SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, | ||||
4981 | ARMcc, CCR, OverflowCmp); | ||||
4982 | |||||
4983 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); | ||||
4984 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | ||||
4985 | } | ||||
4986 | |||||
4987 | static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, | ||||
4988 | SelectionDAG &DAG) { | ||||
4989 | SDLoc DL(BoolCarry); | ||||
4990 | EVT CarryVT = BoolCarry.getValueType(); | ||||
4991 | |||||
4992 | // This converts the boolean value carry into the carry flag by doing | ||||
4993 | // ARMISD::SUBC Carry, 1 | ||||
4994 | SDValue Carry = DAG.getNode(ARMISD::SUBC, DL, | ||||
4995 | DAG.getVTList(CarryVT, MVT::i32), | ||||
4996 | BoolCarry, DAG.getConstant(1, DL, CarryVT)); | ||||
4997 | return Carry.getValue(1); | ||||
4998 | } | ||||
4999 | |||||
5000 | static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, | ||||
5001 | SelectionDAG &DAG) { | ||||
5002 | SDLoc DL(Flags); | ||||
5003 | |||||
5004 | // Now convert the carry flag into a boolean carry. We do this | ||||
5005 | // using ARMISD:ADDE 0, 0, Carry | ||||
5006 | return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), | ||||
5007 | DAG.getConstant(0, DL, MVT::i32), | ||||
5008 | DAG.getConstant(0, DL, MVT::i32), Flags); | ||||
5009 | } | ||||
5010 | |||||
5011 | SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, | ||||
5012 | SelectionDAG &DAG) const { | ||||
5013 | // Let legalize expand this if it isn't a legal type yet. | ||||
5014 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) | ||||
5015 | return SDValue(); | ||||
5016 | |||||
5017 | SDValue LHS = Op.getOperand(0); | ||||
5018 | SDValue RHS = Op.getOperand(1); | ||||
5019 | SDLoc dl(Op); | ||||
5020 | |||||
5021 | EVT VT = Op.getValueType(); | ||||
5022 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); | ||||
5023 | SDValue Value; | ||||
5024 | SDValue Overflow; | ||||
5025 | switch (Op.getOpcode()) { | ||||
5026 | default: | ||||
5027 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5027); | ||||
5028 | case ISD::UADDO: | ||||
5029 | Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); | ||||
5030 | // Convert the carry flag into a boolean value. | ||||
5031 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | ||||
5032 | break; | ||||
5033 | case ISD::USUBO: { | ||||
5034 | Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); | ||||
5035 | // Convert the carry flag into a boolean value. | ||||
5036 | Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); | ||||
5037 | // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow | ||||
5038 | // value. So compute 1 - C. | ||||
5039 | Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, | ||||
5040 | DAG.getConstant(1, dl, MVT::i32), Overflow); | ||||
5041 | break; | ||||
5042 | } | ||||
5043 | } | ||||
5044 | |||||
5045 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); | ||||
5046 | } | ||||
5047 | |||||
5048 | static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, | ||||
5049 | const ARMSubtarget *Subtarget) { | ||||
5050 | EVT VT = Op.getValueType(); | ||||
5051 | if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) | ||||
5052 | return SDValue(); | ||||
5053 | if (!VT.isSimple()) | ||||
5054 | return SDValue(); | ||||
5055 | |||||
5056 | unsigned NewOpcode; | ||||
5057 | switch (VT.getSimpleVT().SimpleTy) { | ||||
5058 | default: | ||||
5059 | return SDValue(); | ||||
5060 | case MVT::i8: | ||||
5061 | switch (Op->getOpcode()) { | ||||
5062 | case ISD::UADDSAT: | ||||
5063 | NewOpcode = ARMISD::UQADD8b; | ||||
5064 | break; | ||||
5065 | case ISD::SADDSAT: | ||||
5066 | NewOpcode = ARMISD::QADD8b; | ||||
5067 | break; | ||||
5068 | case ISD::USUBSAT: | ||||
5069 | NewOpcode = ARMISD::UQSUB8b; | ||||
5070 | break; | ||||
5071 | case ISD::SSUBSAT: | ||||
5072 | NewOpcode = ARMISD::QSUB8b; | ||||
5073 | break; | ||||
5074 | } | ||||
5075 | break; | ||||
5076 | case MVT::i16: | ||||
5077 | switch (Op->getOpcode()) { | ||||
5078 | case ISD::UADDSAT: | ||||
5079 | NewOpcode = ARMISD::UQADD16b; | ||||
5080 | break; | ||||
5081 | case ISD::SADDSAT: | ||||
5082 | NewOpcode = ARMISD::QADD16b; | ||||
5083 | break; | ||||
5084 | case ISD::USUBSAT: | ||||
5085 | NewOpcode = ARMISD::UQSUB16b; | ||||
5086 | break; | ||||
5087 | case ISD::SSUBSAT: | ||||
5088 | NewOpcode = ARMISD::QSUB16b; | ||||
5089 | break; | ||||
5090 | } | ||||
5091 | break; | ||||
5092 | } | ||||
5093 | |||||
5094 | SDLoc dl(Op); | ||||
5095 | SDValue Add = | ||||
5096 | DAG.getNode(NewOpcode, dl, MVT::i32, | ||||
5097 | DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), | ||||
5098 | DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); | ||||
5099 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); | ||||
5100 | } | ||||
5101 | |||||
5102 | SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { | ||||
5103 | SDValue Cond = Op.getOperand(0); | ||||
5104 | SDValue SelectTrue = Op.getOperand(1); | ||||
5105 | SDValue SelectFalse = Op.getOperand(2); | ||||
5106 | SDLoc dl(Op); | ||||
5107 | unsigned Opc = Cond.getOpcode(); | ||||
5108 | |||||
5109 | if (Cond.getResNo() == 1 && | ||||
5110 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5111 | Opc == ISD::USUBO)) { | ||||
5112 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | ||||
5113 | return SDValue(); | ||||
5114 | |||||
5115 | SDValue Value, OverflowCmp; | ||||
5116 | SDValue ARMcc; | ||||
5117 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | ||||
5118 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5119 | EVT VT = Op.getValueType(); | ||||
5120 | |||||
5121 | return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, | ||||
5122 | OverflowCmp, DAG); | ||||
5123 | } | ||||
5124 | |||||
5125 | // Convert: | ||||
5126 | // | ||||
5127 | // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) | ||||
5128 | // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) | ||||
5129 | // | ||||
5130 | if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { | ||||
5131 | const ConstantSDNode *CMOVTrue = | ||||
5132 | dyn_cast<ConstantSDNode>(Cond.getOperand(0)); | ||||
5133 | const ConstantSDNode *CMOVFalse = | ||||
5134 | dyn_cast<ConstantSDNode>(Cond.getOperand(1)); | ||||
5135 | |||||
5136 | if (CMOVTrue && CMOVFalse) { | ||||
5137 | unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); | ||||
5138 | unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); | ||||
5139 | |||||
5140 | SDValue True; | ||||
5141 | SDValue False; | ||||
5142 | if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { | ||||
5143 | True = SelectTrue; | ||||
5144 | False = SelectFalse; | ||||
5145 | } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { | ||||
5146 | True = SelectFalse; | ||||
5147 | False = SelectTrue; | ||||
5148 | } | ||||
5149 | |||||
5150 | if (True.getNode() && False.getNode()) { | ||||
5151 | EVT VT = Op.getValueType(); | ||||
5152 | SDValue ARMcc = Cond.getOperand(2); | ||||
5153 | SDValue CCR = Cond.getOperand(3); | ||||
5154 | SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); | ||||
5155 | assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void ( 0) : __assert_fail ("True.getValueType() == VT", "llvm/lib/Target/ARM/ARMISelLowering.cpp" , 5155, __extension__ __PRETTY_FUNCTION__)); | ||||
5156 | return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); | ||||
5157 | } | ||||
5158 | } | ||||
5159 | } | ||||
5160 | |||||
5161 | // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the | ||||
5162 | // undefined bits before doing a full-word comparison with zero. | ||||
5163 | Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, | ||||
5164 | DAG.getConstant(1, dl, Cond.getValueType())); | ||||
5165 | |||||
5166 | return DAG.getSelectCC(dl, Cond, | ||||
5167 | DAG.getConstant(0, dl, Cond.getValueType()), | ||||
5168 | SelectTrue, SelectFalse, ISD::SETNE); | ||||
5169 | } | ||||
5170 | |||||
5171 | static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, | ||||
5172 | bool &swpCmpOps, bool &swpVselOps) { | ||||
5173 | // Start by selecting the GE condition code for opcodes that return true for | ||||
5174 | // 'equality' | ||||
5175 | if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || | ||||
5176 | CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE) | ||||
5177 | CondCode = ARMCC::GE; | ||||
5178 | |||||
5179 | // and GT for opcodes that return false for 'equality'. | ||||
5180 | else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || | ||||
5181 | CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT) | ||||
5182 | CondCode = ARMCC::GT; | ||||
5183 | |||||
5184 | // Since we are constrained to GE/GT, if the opcode contains 'less', we need | ||||
5185 | // to swap the compare operands. | ||||
5186 | if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || | ||||
5187 | CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT) | ||||
5188 | swpCmpOps = true; | ||||
5189 | |||||
5190 | // Both GT and GE are ordered comparisons, and return false for 'unordered'. | ||||
5191 | // If we have an unordered opcode, we need to swap the operands to the VSEL | ||||
5192 | // instruction (effectively negating the condition). | ||||
5193 | // | ||||
5194 | // This also has the effect of swapping which one of 'less' or 'greater' | ||||
5195 | // returns true, so we also swap the compare operands. It also switches | ||||
5196 | // whether we return true for 'equality', so we compensate by picking the | ||||
5197 | // opposite condition code to our original choice. | ||||
5198 | if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || | ||||
5199 | CC == ISD::SETUGT) { | ||||
5200 | swpCmpOps = !swpCmpOps; | ||||
5201 | swpVselOps = !swpVselOps; | ||||
5202 | CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; | ||||
5203 | } | ||||
5204 | |||||
5205 | // 'ordered' is 'anything but unordered', so use the VS condition code and | ||||
5206 | // swap the VSEL operands. | ||||
5207 | if (CC == ISD::SETO) { | ||||
5208 | CondCode = ARMCC::VS; | ||||
5209 | swpVselOps = true; | ||||
5210 | } | ||||
5211 | |||||
5212 | // 'unordered or not equal' is 'anything but equal', so use the EQ condition | ||||
5213 | // code and swap the VSEL operands. Also do this if we don't care about the | ||||
5214 | // unordered case. | ||||
5215 | if (CC == ISD::SETUNE || CC == ISD::SETNE) { | ||||
5216 | CondCode = ARMCC::EQ; | ||||
5217 | swpVselOps = true; | ||||
5218 | } | ||||
5219 | } | ||||
5220 | |||||
5221 | SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, | ||||
5222 | SDValue TrueVal, SDValue ARMcc, SDValue CCR, | ||||
5223 | SDValue Cmp, SelectionDAG &DAG) const { | ||||
5224 | if (!Subtarget->hasFP64() && VT == MVT::f64) { | ||||
5225 | FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
5226 | DAG.getVTList(MVT::i32, MVT::i32), FalseVal); | ||||
5227 | TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, | ||||
5228 | DAG.getVTList(MVT::i32, MVT::i32), TrueVal); | ||||
5229 | |||||
5230 | SDValue TrueLow = TrueVal.getValue(0); | ||||
5231 | SDValue TrueHigh = TrueVal.getValue(1); | ||||
5232 | SDValue FalseLow = FalseVal.getValue(0); | ||||
5233 | SDValue FalseHigh = FalseVal.getValue(1); | ||||
5234 | |||||
5235 | SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, | ||||
5236 | ARMcc, CCR, Cmp); | ||||
5237 | SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, | ||||
5238 | ARMcc, CCR, duplicateCmp(Cmp, DAG)); | ||||
5239 | |||||
5240 | return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); | ||||
5241 | } else { | ||||
5242 | return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, | ||||
5243 | Cmp); | ||||
5244 | } | ||||
5245 | } | ||||
5246 | |||||
5247 | static bool isGTorGE(ISD::CondCode CC) { | ||||
5248 | return CC == ISD::SETGT || CC == ISD::SETGE; | ||||
5249 | } | ||||
5250 | |||||
5251 | static bool isLTorLE(ISD::CondCode CC) { | ||||
5252 | return CC == ISD::SETLT || CC == ISD::SETLE; | ||||
5253 | } | ||||
5254 | |||||
5255 | // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. | ||||
5256 | // All of these conditions (and their <= and >= counterparts) will do: | ||||
5257 | // x < k ? k : x | ||||
5258 | // x > k ? x : k | ||||
5259 | // k < x ? x : k | ||||
5260 | // k > x ? k : x | ||||
5261 | static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, | ||||
5262 | const SDValue TrueVal, const SDValue FalseVal, | ||||
5263 | const ISD::CondCode CC, const SDValue K) { | ||||
5264 | return (isGTorGE(CC) && | ||||
5265 | ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || | ||||
5266 | (isLTorLE(CC) && | ||||
5267 | ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); | ||||
5268 | } | ||||
5269 | |||||
5270 | // Check if two chained conditionals could be converted into SSAT or USAT. | ||||
5271 | // | ||||
5272 | // SSAT can replace a set of two conditional selectors that bound a number to an | ||||
5273 | // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: | ||||
5274 | // | ||||
5275 | // x < -k ? -k : (x > k ? k : x) | ||||
5276 | // x < -k ? -k : (x < k ? x : k) | ||||
5277 | // x > -k ? (x > k ? k : x) : -k | ||||
5278 | // x < k ? (x < -k ? -k : x) : k | ||||
5279 | // etc. | ||||
5280 | // | ||||
5281 | // LLVM canonicalizes these to either a min(max()) or a max(min()) | ||||
5282 | // pattern. This function tries to match one of these and will return a SSAT | ||||
5283 | // node if successful. | ||||
5284 | // | ||||
5285 | // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 | ||||
5286 | // is a power of 2. | ||||
5287 | static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) { | ||||
5288 | EVT VT = Op.getValueType(); | ||||
5289 | SDValue V1 = Op.getOperand(0); | ||||
5290 | SDValue K1 = Op.getOperand(1); | ||||
5291 | SDValue TrueVal1 = Op.getOperand(2); | ||||
5292 | SDValue FalseVal1 = Op.getOperand(3); | ||||
5293 | ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5294 | |||||
5295 | const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1; | ||||
5296 | if (Op2.getOpcode() != ISD::SELECT_CC) | ||||
5297 | return SDValue(); | ||||
5298 | |||||
5299 | SDValue V2 = Op2.getOperand(0); | ||||
5300 | SDValue K2 = Op2.getOperand(1); | ||||
5301 | SDValue TrueVal2 = Op2.getOperand(2); | ||||
5302 | SDValue FalseVal2 = Op2.getOperand(3); | ||||
5303 | ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get(); | ||||
5304 | |||||
5305 | SDValue V1Tmp = V1; | ||||
5306 | SDValue V2Tmp = V2; | ||||
5307 | |||||
5308 | // Check that the registers and the constants match a max(min()) or min(max()) | ||||
5309 | // pattern | ||||
5310 | if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 || | ||||
5311 | K2 != FalseVal2 || | ||||
5312 | !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) | ||||
5313 | return SDValue(); | ||||
5314 | |||||
5315 | // Check that the constant in the lower-bound check is | ||||
5316 | // the opposite of the constant in the upper-bound check | ||||
5317 | // in 1's complement. | ||||
5318 | if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2)) | ||||
5319 | return SDValue(); | ||||
5320 | |||||
5321 | int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue(); | ||||
5322 | int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue(); | ||||
5323 | int64_t PosVal = std::max(Val1, Val2); | ||||
5324 | int64_t NegVal = std::min(Val1, Val2); | ||||
5325 | |||||
5326 | if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) || | ||||
5327 | !isPowerOf2_64(PosVal + 1)) | ||||
5328 | return SDValue(); | ||||
5329 | |||||
5330 | // Handle the difference between USAT (unsigned) and SSAT (signed) | ||||
5331 | // saturation | ||||
5332 | // At this point, PosVal is guaranteed to be positive | ||||
5333 | uint64_t K = PosVal; | ||||
5334 | SDLoc dl(Op); | ||||
5335 | if (Val1 == ~Val2) | ||||
5336 | return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp, | ||||
5337 | DAG.getConstant(llvm::countr_one(K), dl, VT)); | ||||
5338 | if (NegVal == 0) | ||||
5339 | return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp, | ||||
5340 | DAG.getConstant(llvm::countr_one(K), dl, VT)); | ||||
5341 | |||||
5342 | return SDValue(); | ||||
5343 | } | ||||
5344 | |||||
5345 | // Check if a condition of the type x < k ? k : x can be converted into a | ||||
5346 | // bit operation instead of conditional moves. | ||||
5347 | // Currently this is allowed given: | ||||
5348 | // - The conditions and values match up | ||||
5349 | // - k is 0 or -1 (all ones) | ||||
5350 | // This function will not check the last condition, thats up to the caller | ||||
5351 | // It returns true if the transformation can be made, and in such case | ||||
5352 | // returns x in V, and k in SatK. | ||||
5353 | static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, | ||||
5354 | SDValue &SatK) | ||||
5355 | { | ||||
5356 | SDValue LHS = Op.getOperand(0); | ||||
5357 | SDValue RHS = Op.getOperand(1); | ||||
5358 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5359 | SDValue TrueVal = Op.getOperand(2); | ||||
5360 | SDValue FalseVal = Op.getOperand(3); | ||||
5361 | |||||
5362 | SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS) | ||||
5363 | ? &RHS | ||||
5364 | : nullptr; | ||||
5365 | |||||
5366 | // No constant operation in comparison, early out | ||||
5367 | if (!K) | ||||
5368 | return false; | ||||
5369 | |||||
5370 | SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal; | ||||
5371 | V = (KTmp == TrueVal) ? FalseVal : TrueVal; | ||||
5372 | SDValue VTmp = (K && *K == LHS) ? RHS : LHS; | ||||
5373 | |||||
5374 | // If the constant on left and right side, or variable on left and right, | ||||
5375 | // does not match, early out | ||||
5376 | if (*K != KTmp || V != VTmp) | ||||
5377 | return false; | ||||
5378 | |||||
5379 | if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { | ||||
5380 | SatK = *K; | ||||
5381 | return true; | ||||
5382 | } | ||||
5383 | |||||
5384 | return false; | ||||
5385 | } | ||||
5386 | |||||
5387 | bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const { | ||||
5388 | if (VT == MVT::f32) | ||||
5389 | return !Subtarget->hasVFP2Base(); | ||||
5390 | if (VT == MVT::f64) | ||||
5391 | return !Subtarget->hasFP64(); | ||||
5392 | if (VT == MVT::f16) | ||||
5393 | return !Subtarget->hasFullFP16(); | ||||
5394 | return false; | ||||
5395 | } | ||||
5396 | |||||
5397 | SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { | ||||
5398 | EVT VT = Op.getValueType(); | ||||
5399 | SDLoc dl(Op); | ||||
5400 | |||||
5401 | // Try to convert two saturating conditional selects into a single SSAT | ||||
5402 | if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) | ||||
5403 | if (SDValue SatValue = LowerSaturatingConditional(Op, DAG)) | ||||
5404 | return SatValue; | ||||
5405 | |||||
5406 | // Try to convert expressions of the form x < k ? k : x (and similar forms) | ||||
5407 | // into more efficient bit operations, which is possible when k is 0 or -1 | ||||
5408 | // On ARM and Thumb-2 which have flexible operand 2 this will result in | ||||
5409 | // single instructions. On Thumb the shift and the bit operation will be two | ||||
5410 | // instructions. | ||||
5411 | // Only allow this transformation on full-width (32-bit) operations | ||||
5412 | SDValue LowerSatConstant; | ||||
5413 | SDValue SatValue; | ||||
5414 | if (VT == MVT::i32 && | ||||
5415 | isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { | ||||
5416 | SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, | ||||
5417 | DAG.getConstant(31, dl, VT)); | ||||
5418 | if (isNullConstant(LowerSatConstant)) { | ||||
5419 | SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, | ||||
5420 | DAG.getAllOnesConstant(dl, VT)); | ||||
5421 | return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); | ||||
5422 | } else if (isAllOnesConstant(LowerSatConstant)) | ||||
5423 | return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); | ||||
5424 | } | ||||
5425 | |||||
5426 | SDValue LHS = Op.getOperand(0); | ||||
5427 | SDValue RHS = Op.getOperand(1); | ||||
5428 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); | ||||
5429 | SDValue TrueVal = Op.getOperand(2); | ||||
5430 | SDValue FalseVal = Op.getOperand(3); | ||||
5431 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal); | ||||
5432 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal); | ||||
5433 | |||||
5434 | if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && | ||||
5435 | LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) { | ||||
5436 | unsigned TVal = CTVal->getZExtValue(); | ||||
5437 | unsigned FVal = CFVal->getZExtValue(); | ||||
5438 | unsigned Opcode = 0; | ||||
5439 | |||||
5440 | if (TVal == ~FVal) { | ||||
5441 | Opcode = ARMISD::CSINV; | ||||
5442 | } else if (TVal == ~FVal + 1) { | ||||
5443 | Opcode = ARMISD::CSNEG; | ||||
5444 | } else if (TVal + 1 == FVal) { | ||||
5445 | Opcode = ARMISD::CSINC; | ||||
5446 | } else if (TVal == FVal + 1) { | ||||
5447 | Opcode = ARMISD::CSINC; | ||||
5448 | std::swap(TrueVal, FalseVal); | ||||
5449 | std::swap(TVal, FVal); | ||||
5450 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5451 | } | ||||
5452 | |||||
5453 | if (Opcode) { | ||||
5454 | // If one of the constants is cheaper than another, materialise the | ||||
5455 | // cheaper one and let the csel generate the other. | ||||
5456 | if (Opcode != ARMISD::CSINC && | ||||
5457 | HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) { | ||||
5458 | std::swap(TrueVal, FalseVal); | ||||
5459 | std::swap(TVal, FVal); | ||||
5460 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5461 | } | ||||
5462 | |||||
5463 | // Attempt to use ZR checking TVal is 0, possibly inverting the condition | ||||
5464 | // to get there. CSINC not is invertable like the other two (~(~a) == a, | ||||
5465 | // -(-a) == a, but (a+1)+1 != a). | ||||
5466 | if (FVal == 0 && Opcode != ARMISD::CSINC) { | ||||
5467 | std::swap(TrueVal, FalseVal); | ||||
5468 | std::swap(TVal, FVal); | ||||
5469 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5470 | } | ||||
5471 | |||||
5472 | // Drops F's value because we can get it by inverting/negating TVal. | ||||
5473 | FalseVal = TrueVal; | ||||
5474 | |||||
5475 | SDValue ARMcc; | ||||
5476 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5477 | EVT VT = TrueVal.getValueType(); | ||||
5478 | return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp); | ||||
5479 | } | ||||
5480 | } | ||||
5481 | |||||
5482 | if (isUnsupportedFloatingType(LHS.getValueType())) { | ||||
5483 | DAG.getTargetLoweringInfo().softenSetCCOperands( | ||||
5484 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | ||||
5485 | |||||
5486 | // If softenSetCCOperands only returned one value, we should compare it to | ||||
5487 | // zero. | ||||
5488 | if (!RHS.getNode()) { | ||||
5489 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | ||||
5490 | CC = ISD::SETNE; | ||||
5491 | } | ||||
5492 | } | ||||
5493 | |||||
5494 | if (LHS.getValueType() == MVT::i32) { | ||||
5495 | // Try to generate VSEL on ARMv8. | ||||
5496 | // The VSEL instruction can't use all the usual ARM condition | ||||
5497 | // codes: it only has two bits to select the condition code, so it's | ||||
5498 | // constrained to use only GE, GT, VS and EQ. | ||||
5499 | // | ||||
5500 | // To implement all the various ISD::SETXXX opcodes, we sometimes need to | ||||
5501 | // swap the operands of the previous compare instruction (effectively | ||||
5502 | // inverting the compare condition, swapping 'less' and 'greater') and | ||||
5503 | // sometimes need to swap the operands to the VSEL (which inverts the | ||||
5504 | // condition in the sense of firing whenever the previous condition didn't) | ||||
5505 | if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 || | ||||
5506 | TrueVal.getValueType() == MVT::f32 || | ||||
5507 | TrueVal.getValueType() == MVT::f64)) { | ||||
5508 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
5509 | if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || | ||||
5510 | CondCode == ARMCC::VC || CondCode == ARMCC::NE) { | ||||
5511 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); | ||||
5512 | std::swap(TrueVal, FalseVal); | ||||
5513 | } | ||||
5514 | } | ||||
5515 | |||||
5516 | SDValue ARMcc; | ||||
5517 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5518 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5519 | // Choose GE over PL, which vsel does now support | ||||
5520 | if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL) | ||||
5521 | ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); | ||||
5522 | return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | ||||
5523 | } | ||||
5524 | |||||
5525 | ARMCC::CondCodes CondCode, CondCode2; | ||||
5526 | FPCCToARMCC(CC, CondCode, CondCode2); | ||||
5527 | |||||
5528 | // Normalize the fp compare. If RHS is zero we prefer to keep it there so we | ||||
5529 | // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we | ||||
5530 | // must use VSEL (limited condition codes), due to not having conditional f16 | ||||
5531 | // moves. | ||||
5532 | if (Subtarget->hasFPARMv8Base() && | ||||
5533 | !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) && | ||||
5534 | (TrueVal.getValueType() == MVT::f16 || | ||||
5535 | TrueVal.getValueType() == MVT::f32 || | ||||
5536 | TrueVal.getValueType() == MVT::f64)) { | ||||
5537 | bool swpCmpOps = false; | ||||
5538 | bool swpVselOps = false; | ||||
5539 | checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); | ||||
5540 | |||||
5541 | if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || | ||||
5542 | CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { | ||||
5543 | if (swpCmpOps) | ||||
5544 | std::swap(LHS, RHS); | ||||
5545 | if (swpVselOps) | ||||
5546 | std::swap(TrueVal, FalseVal); | ||||
5547 | } | ||||
5548 | } | ||||
5549 | |||||
5550 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5551 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5552 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5553 | SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); | ||||
5554 | if (CondCode2 != ARMCC::AL) { | ||||
5555 | SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); | ||||
5556 | // FIXME: Needs another CMP because flag can have but one use. | ||||
5557 | SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5558 | Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); | ||||
5559 | } | ||||
5560 | return Result; | ||||
5561 | } | ||||
5562 | |||||
5563 | /// canChangeToInt - Given the fp compare operand, return true if it is suitable | ||||
5564 | /// to morph to an integer compare sequence. | ||||
5565 | static bool canChangeToInt(SDValue Op, bool &SeenZero, | ||||
5566 | const ARMSubtarget *Subtarget) { | ||||
5567 | SDNode *N = Op.getNode(); | ||||
5568 | if (!N->hasOneUse()) | ||||
5569 | // Otherwise it requires moving the value from fp to integer registers. | ||||
5570 | return false; | ||||
5571 | if (!N->getNumValues()) | ||||
5572 | return false; | ||||
5573 | EVT VT = Op.getValueType(); | ||||
5574 | if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) | ||||
5575 | // f32 case is generally profitable. f64 case only makes sense when vcmpe + | ||||
5576 | // vmrs are very slow, e.g. cortex-a8. | ||||
5577 | return false; | ||||
5578 | |||||
5579 | if (isFloatingPointZero(Op)) { | ||||
5580 | SeenZero = true; | ||||
5581 | return true; | ||||
5582 | } | ||||
5583 | return ISD::isNormalLoad(N); | ||||
5584 | } | ||||
5585 | |||||
5586 | static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { | ||||
5587 | if (isFloatingPointZero(Op)) | ||||
5588 | return DAG.getConstant(0, SDLoc(Op), MVT::i32); | ||||
5589 | |||||
5590 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) | ||||
5591 | return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), | ||||
5592 | Ld->getPointerInfo(), Ld->getAlign(), | ||||
5593 | Ld->getMemOperand()->getFlags()); | ||||
5594 | |||||
5595 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5595); | ||||
5596 | } | ||||
5597 | |||||
5598 | static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, | ||||
5599 | SDValue &RetVal1, SDValue &RetVal2) { | ||||
5600 | SDLoc dl(Op); | ||||
5601 | |||||
5602 | if (isFloatingPointZero(Op)) { | ||||
5603 | RetVal1 = DAG.getConstant(0, dl, MVT::i32); | ||||
5604 | RetVal2 = DAG.getConstant(0, dl, MVT::i32); | ||||
5605 | return; | ||||
5606 | } | ||||
5607 | |||||
5608 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { | ||||
5609 | SDValue Ptr = Ld->getBasePtr(); | ||||
5610 | RetVal1 = | ||||
5611 | DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), | ||||
5612 | Ld->getAlign(), Ld->getMemOperand()->getFlags()); | ||||
5613 | |||||
5614 | EVT PtrType = Ptr.getValueType(); | ||||
5615 | SDValue NewPtr = DAG.getNode(ISD::ADD, dl, | ||||
5616 | PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); | ||||
5617 | RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, | ||||
5618 | Ld->getPointerInfo().getWithOffset(4), | ||||
5619 | commonAlignment(Ld->getAlign(), 4), | ||||
5620 | Ld->getMemOperand()->getFlags()); | ||||
5621 | return; | ||||
5622 | } | ||||
5623 | |||||
5624 | llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5624); | ||||
5625 | } | ||||
5626 | |||||
5627 | /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some | ||||
5628 | /// f32 and even f64 comparisons to integer ones. | ||||
5629 | SDValue | ||||
5630 | ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { | ||||
5631 | SDValue Chain = Op.getOperand(0); | ||||
5632 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | ||||
5633 | SDValue LHS = Op.getOperand(2); | ||||
5634 | SDValue RHS = Op.getOperand(3); | ||||
5635 | SDValue Dest = Op.getOperand(4); | ||||
5636 | SDLoc dl(Op); | ||||
5637 | |||||
5638 | bool LHSSeenZero = false; | ||||
5639 | bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); | ||||
5640 | bool RHSSeenZero = false; | ||||
5641 | bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); | ||||
5642 | if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { | ||||
5643 | // If unsafe fp math optimization is enabled and there are no other uses of | ||||
5644 | // the CMP operands, and the condition code is EQ or NE, we can optimize it | ||||
5645 | // to an integer comparison. | ||||
5646 | if (CC == ISD::SETOEQ) | ||||
5647 | CC = ISD::SETEQ; | ||||
5648 | else if (CC == ISD::SETUNE) | ||||
5649 | CC = ISD::SETNE; | ||||
5650 | |||||
5651 | SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); | ||||
5652 | SDValue ARMcc; | ||||
5653 | if (LHS.getValueType() == MVT::f32) { | ||||
5654 | LHS = DAG.getNode(ISD::AND, dl, MVT::i32, | ||||
5655 | bitcastf32Toi32(LHS, DAG), Mask); | ||||
5656 | RHS = DAG.getNode(ISD::AND, dl, MVT::i32, | ||||
5657 | bitcastf32Toi32(RHS, DAG), Mask); | ||||
5658 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5659 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5660 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | ||||
5661 | Chain, Dest, ARMcc, CCR, Cmp); | ||||
5662 | } | ||||
5663 | |||||
5664 | SDValue LHS1, LHS2; | ||||
5665 | SDValue RHS1, RHS2; | ||||
5666 | expandf64Toi32(LHS, DAG, LHS1, LHS2); | ||||
5667 | expandf64Toi32(RHS, DAG, RHS1, RHS2); | ||||
5668 | LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); | ||||
5669 | RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); | ||||
5670 | ARMCC::CondCodes CondCode = IntCCToARMCC(CC); | ||||
5671 | ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5672 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
5673 | SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; | ||||
5674 | return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); | ||||
5675 | } | ||||
5676 | |||||
5677 | return SDValue(); | ||||
5678 | } | ||||
5679 | |||||
5680 | SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { | ||||
5681 | SDValue Chain = Op.getOperand(0); | ||||
5682 | SDValue Cond = Op.getOperand(1); | ||||
5683 | SDValue Dest = Op.getOperand(2); | ||||
5684 | SDLoc dl(Op); | ||||
5685 | |||||
5686 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | ||||
5687 | // instruction. | ||||
5688 | unsigned Opc = Cond.getOpcode(); | ||||
5689 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | ||||
5690 | !Subtarget->isThumb1Only(); | ||||
5691 | if (Cond.getResNo() == 1 && | ||||
5692 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5693 | Opc == ISD::USUBO || OptimizeMul)) { | ||||
5694 | // Only lower legal XALUO ops. | ||||
5695 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) | ||||
5696 | return SDValue(); | ||||
5697 | |||||
5698 | // The actual operation with overflow check. | ||||
5699 | SDValue Value, OverflowCmp; | ||||
5700 | SDValue ARMcc; | ||||
5701 | std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); | ||||
5702 | |||||
5703 | // Reverse the condition code. | ||||
5704 | ARMCC::CondCodes CondCode = | ||||
5705 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | ||||
5706 | CondCode = ARMCC::getOppositeCondition(CondCode); | ||||
5707 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | ||||
5708 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5709 | |||||
5710 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | ||||
5711 | OverflowCmp); | ||||
5712 | } | ||||
5713 | |||||
5714 | return SDValue(); | ||||
5715 | } | ||||
5716 | |||||
5717 | SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { | ||||
5718 | SDValue Chain = Op.getOperand(0); | ||||
5719 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); | ||||
5720 | SDValue LHS = Op.getOperand(2); | ||||
5721 | SDValue RHS = Op.getOperand(3); | ||||
5722 | SDValue Dest = Op.getOperand(4); | ||||
5723 | SDLoc dl(Op); | ||||
5724 | |||||
5725 | if (isUnsupportedFloatingType(LHS.getValueType())) { | ||||
5726 | DAG.getTargetLoweringInfo().softenSetCCOperands( | ||||
5727 | DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); | ||||
5728 | |||||
5729 | // If softenSetCCOperands only returned one value, we should compare it to | ||||
5730 | // zero. | ||||
5731 | if (!RHS.getNode()) { | ||||
5732 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); | ||||
5733 | CC = ISD::SETNE; | ||||
5734 | } | ||||
5735 | } | ||||
5736 | |||||
5737 | // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch | ||||
5738 | // instruction. | ||||
5739 | unsigned Opc = LHS.getOpcode(); | ||||
5740 | bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && | ||||
5741 | !Subtarget->isThumb1Only(); | ||||
5742 | if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && | ||||
5743 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | ||||
5744 | Opc == ISD::USUBO || OptimizeMul) && | ||||
5745 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { | ||||
5746 | // Only lower legal XALUO ops. | ||||
5747 | if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) | ||||
5748 | return SDValue(); | ||||
5749 | |||||
5750 | // The actual operation with overflow check. | ||||
5751 | SDValue Value, OverflowCmp; | ||||
5752 | SDValue ARMcc; | ||||
5753 | std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); | ||||
5754 | |||||
5755 | if ((CC == ISD::SETNE) != isOneConstant(RHS)) { | ||||
5756 | // Reverse the condition code. | ||||
5757 | ARMCC::CondCodes CondCode = | ||||
5758 | (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); | ||||
5759 | CondCode = ARMCC::getOppositeCondition(CondCode); | ||||
5760 | ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); | ||||
5761 | } | ||||
5762 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5763 | |||||
5764 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, | ||||
5765 | OverflowCmp); | ||||
5766 | } | ||||
5767 | |||||
5768 | if (LHS.getValueType() == MVT::i32) { | ||||
5769 | SDValue ARMcc; | ||||
5770 | SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); | ||||
5771 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5772 | return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, | ||||
5773 | Chain, Dest, ARMcc, CCR, Cmp); | ||||
5774 | } | ||||
5775 | |||||
5776 | if (getTargetMachine().Options.UnsafeFPMath && | ||||
5777 | (CC == ISD::SETEQ || CC == ISD::SETOEQ || | ||||
5778 | CC == ISD::SETNE || CC == ISD::SETUNE)) { | ||||
5779 | if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) | ||||
5780 | return Result; | ||||
5781 | } | ||||
5782 | |||||
5783 | ARMCC::CondCodes CondCode, CondCode2; | ||||
5784 | FPCCToARMCC(CC, CondCode, CondCode2); | ||||
5785 | |||||
5786 | SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); | ||||
5787 | SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); | ||||
5788 | SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); | ||||
5789 | SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); | ||||
5790 | SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; | ||||
5791 | SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | ||||
5792 | if (CondCode2 != ARMCC::AL) { | ||||
5793 | ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); | ||||
5794 | SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; | ||||
5795 | Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); | ||||
5796 | } | ||||
5797 | return Res; | ||||
5798 | } | ||||
5799 | |||||
5800 | SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { | ||||
5801 | SDValue Chain = Op.getOperand(0); | ||||
5802 | SDValue Table = Op.getOperand(1); | ||||
5803 | SDValue Index = Op.getOperand(2); | ||||
5804 | SDLoc dl(Op); | ||||
5805 | |||||
5806 | EVT PTy = getPointerTy(DAG.getDataLayout()); | ||||
5807 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); | ||||
5808 | SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); | ||||
5809 | Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); | ||||
5810 | Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); | ||||
5811 | SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); | ||||
5812 | if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { | ||||
5813 | // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table | ||||
5814 | // which does another jump to the destination. This also makes it easier | ||||
5815 | // to translate it to TBB / TBH later (Thumb2 only). | ||||
5816 | // FIXME: This might not work if the function is extremely large. | ||||
5817 | return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, | ||||
5818 | Addr, Op.getOperand(2), JTI); | ||||
5819 | } | ||||
5820 | if (isPositionIndependent() || Subtarget->isROPI()) { | ||||
5821 | Addr = | ||||
5822 | DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, | ||||
5823 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | ||||
5824 | Chain = Addr.getValue(1); | ||||
5825 | Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); | ||||
5826 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | ||||
5827 | } else { | ||||
5828 | Addr = | ||||
5829 | DAG.getLoad(PTy, dl, Chain, Addr, | ||||
5830 | MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); | ||||
5831 | Chain = Addr.getValue(1); | ||||
5832 | return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); | ||||
5833 | } | ||||
5834 | } | ||||
5835 | |||||
5836 | static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { | ||||
5837 | EVT VT = Op.getValueType(); | ||||
5838 | SDLoc dl(Op); | ||||
5839 | |||||
5840 | if (Op.getValueType().getVectorElementType() == MVT::i32) { | ||||
5841 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) | ||||
5842 | return Op; | ||||
5843 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5844 | } | ||||
5845 | |||||
5846 | const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16(); | ||||
5847 | |||||
5848 | EVT NewTy; | ||||
5849 | const EVT OpTy = Op.getOperand(0).getValueType(); | ||||
5850 | if (OpTy == MVT::v4f32) | ||||
5851 | NewTy = MVT::v4i32; | ||||
5852 | else if (OpTy == MVT::v4f16 && HasFullFP16) | ||||
5853 | NewTy = MVT::v4i16; | ||||
5854 | else if (OpTy == MVT::v8f16 && HasFullFP16) | ||||
5855 | NewTy = MVT::v8i16; | ||||
5856 | else | ||||
5857 | llvm_unreachable("Invalid type for custom lowering!")::llvm::llvm_unreachable_internal("Invalid type for custom lowering!" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5857); | ||||
5858 | |||||
5859 | if (VT != MVT::v4i16 && VT != MVT::v8i16) | ||||
5860 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5861 | |||||
5862 | Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); | ||||
5863 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); | ||||
5864 | } | ||||
5865 | |||||
5866 | SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { | ||||
5867 | EVT VT = Op.getValueType(); | ||||
5868 | if (VT.isVector()) | ||||
5869 | return LowerVectorFP_TO_INT(Op, DAG); | ||||
5870 | |||||
5871 | bool IsStrict = Op->isStrictFPOpcode(); | ||||
5872 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); | ||||
5873 | |||||
5874 | if (isUnsupportedFloatingType(SrcVal.getValueType())) { | ||||
5875 | RTLIB::Libcall LC; | ||||
5876 | if (Op.getOpcode() == ISD::FP_TO_SINT || | ||||
5877 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT) | ||||
5878 | LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), | ||||
5879 | Op.getValueType()); | ||||
5880 | else | ||||
5881 | LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), | ||||
5882 | Op.getValueType()); | ||||
5883 | SDLoc Loc(Op); | ||||
5884 | MakeLibCallOptions CallOptions; | ||||
5885 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); | ||||
5886 | SDValue Result; | ||||
5887 | std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, | ||||
5888 | CallOptions, Loc, Chain); | ||||
5889 | return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result; | ||||
5890 | } | ||||
5891 | |||||
5892 | // FIXME: Remove this when we have strict fp instruction selection patterns | ||||
5893 | if (IsStrict) { | ||||
5894 | SDLoc Loc(Op); | ||||
5895 | SDValue Result = | ||||
5896 | DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT | ||||
5897 | : ISD::FP_TO_UINT, | ||||
5898 | Loc, Op.getValueType(), SrcVal); | ||||
5899 | return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc); | ||||
5900 | } | ||||
5901 | |||||
5902 | return Op; | ||||
5903 | } | ||||
5904 | |||||
5905 | static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, | ||||
5906 | const ARMSubtarget *Subtarget) { | ||||
5907 | EVT VT = Op.getValueType(); | ||||
5908 | EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); | ||||
5909 | EVT FromVT = Op.getOperand(0).getValueType(); | ||||
5910 | |||||
5911 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32) | ||||
5912 | return Op; | ||||
5913 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 && | ||||
5914 | Subtarget->hasFP64()) | ||||
5915 | return Op; | ||||
5916 | if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 && | ||||
5917 | Subtarget->hasFullFP16()) | ||||
5918 | return Op; | ||||
5919 | if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 && | ||||
5920 | Subtarget->hasMVEFloatOps()) | ||||
5921 | return Op; | ||||
5922 | if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 && | ||||
5923 | Subtarget->hasMVEFloatOps()) | ||||
5924 | return Op; | ||||
5925 | |||||
5926 | if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16) | ||||
5927 | return SDValue(); | ||||
5928 | |||||
5929 | SDLoc DL(Op); | ||||
5930 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT; | ||||
5931 | unsigned BW = ToVT.getScalarSizeInBits() - IsSigned; | ||||
5932 | SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), | ||||
5933 | DAG.getValueType(VT.getScalarType())); | ||||
5934 | SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT, | ||||
5935 | DAG.getConstant((1 << BW) - 1, DL, VT)); | ||||
5936 | if (IsSigned) | ||||
5937 | Max = DAG.getNode(ISD::SMAX, DL, VT, Max, | ||||
5938 | DAG.getConstant(-(1 << BW), DL, VT)); | ||||
5939 | return Max; | ||||
5940 | } | ||||
5941 | |||||
5942 | static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { | ||||
5943 | EVT VT = Op.getValueType(); | ||||
5944 | SDLoc dl(Op); | ||||
5945 | |||||
5946 | if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { | ||||
5947 | if (VT.getVectorElementType() == MVT::f32) | ||||
5948 | return Op; | ||||
5949 | return DAG.UnrollVectorOp(Op.getNode()); | ||||
5950 | } | ||||
5951 | |||||
5952 | assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||(static_cast <bool> ((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!") ? void (0) : __assert_fail ("(Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && \"Invalid type for custom lowering!\"" , "llvm/lib/Target/ARM/ARMISelLowering.cpp", 5954, __extension__ __PRETTY_FUNCTION__)) | ||||