File: | llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
Warning: | line 1114, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AArch64TargetLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "AArch64ISelLowering.h" |
14 | #include "AArch64CallingConvention.h" |
15 | #include "AArch64ExpandImm.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64PerfectShuffle.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/ADT/APFloat.h" |
23 | #include "llvm/ADT/APInt.h" |
24 | #include "llvm/ADT/ArrayRef.h" |
25 | #include "llvm/ADT/STLExtras.h" |
26 | #include "llvm/ADT/SmallSet.h" |
27 | #include "llvm/ADT/SmallVector.h" |
28 | #include "llvm/ADT/Statistic.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/ADT/Triple.h" |
31 | #include "llvm/ADT/Twine.h" |
32 | #include "llvm/Analysis/ObjCARCUtil.h" |
33 | #include "llvm/Analysis/VectorUtils.h" |
34 | #include "llvm/CodeGen/CallingConvLower.h" |
35 | #include "llvm/CodeGen/MachineBasicBlock.h" |
36 | #include "llvm/CodeGen/MachineFrameInfo.h" |
37 | #include "llvm/CodeGen/MachineFunction.h" |
38 | #include "llvm/CodeGen/MachineInstr.h" |
39 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
40 | #include "llvm/CodeGen/MachineMemOperand.h" |
41 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
42 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
43 | #include "llvm/CodeGen/SelectionDAG.h" |
44 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
45 | #include "llvm/CodeGen/TargetCallingConv.h" |
46 | #include "llvm/CodeGen/TargetInstrInfo.h" |
47 | #include "llvm/CodeGen/ValueTypes.h" |
48 | #include "llvm/IR/Attributes.h" |
49 | #include "llvm/IR/Constants.h" |
50 | #include "llvm/IR/DataLayout.h" |
51 | #include "llvm/IR/DebugLoc.h" |
52 | #include "llvm/IR/DerivedTypes.h" |
53 | #include "llvm/IR/Function.h" |
54 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
55 | #include "llvm/IR/GlobalValue.h" |
56 | #include "llvm/IR/IRBuilder.h" |
57 | #include "llvm/IR/Instruction.h" |
58 | #include "llvm/IR/Instructions.h" |
59 | #include "llvm/IR/IntrinsicInst.h" |
60 | #include "llvm/IR/Intrinsics.h" |
61 | #include "llvm/IR/IntrinsicsAArch64.h" |
62 | #include "llvm/IR/Module.h" |
63 | #include "llvm/IR/OperandTraits.h" |
64 | #include "llvm/IR/PatternMatch.h" |
65 | #include "llvm/IR/Type.h" |
66 | #include "llvm/IR/Use.h" |
67 | #include "llvm/IR/Value.h" |
68 | #include "llvm/MC/MCRegisterInfo.h" |
69 | #include "llvm/Support/Casting.h" |
70 | #include "llvm/Support/CodeGen.h" |
71 | #include "llvm/Support/CommandLine.h" |
72 | #include "llvm/Support/Compiler.h" |
73 | #include "llvm/Support/Debug.h" |
74 | #include "llvm/Support/ErrorHandling.h" |
75 | #include "llvm/Support/KnownBits.h" |
76 | #include "llvm/Support/MachineValueType.h" |
77 | #include "llvm/Support/MathExtras.h" |
78 | #include "llvm/Support/raw_ostream.h" |
79 | #include "llvm/Target/TargetMachine.h" |
80 | #include "llvm/Target/TargetOptions.h" |
81 | #include <algorithm> |
82 | #include <bitset> |
83 | #include <cassert> |
84 | #include <cctype> |
85 | #include <cstdint> |
86 | #include <cstdlib> |
87 | #include <iterator> |
88 | #include <limits> |
89 | #include <tuple> |
90 | #include <utility> |
91 | #include <vector> |
92 | |
93 | using namespace llvm; |
94 | using namespace llvm::PatternMatch; |
95 | |
96 | #define DEBUG_TYPE"aarch64-lower" "aarch64-lower" |
97 | |
98 | STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls" , "Number of tail calls"}; |
99 | STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts" , "Number of vector shift inserts"}; |
100 | STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms" , "Number of times immediates were optimized"}; |
101 | |
102 | // FIXME: The necessary dtprel relocations don't seem to be supported |
103 | // well in the GNU bfd and gold linkers at the moment. Therefore, by |
104 | // default, for now, fall back to GeneralDynamic code generation. |
105 | cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( |
106 | "aarch64-elf-ldtls-generation", cl::Hidden, |
107 | cl::desc("Allow AArch64 Local Dynamic TLS code generation"), |
108 | cl::init(false)); |
109 | |
110 | static cl::opt<bool> |
111 | EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, |
112 | cl::desc("Enable AArch64 logical imm instruction " |
113 | "optimization"), |
114 | cl::init(true)); |
115 | |
116 | // Temporary option added for the purpose of testing functionality added |
117 | // to DAGCombiner.cpp in D92230. It is expected that this can be removed |
118 | // in future when both implementations will be based off MGATHER rather |
119 | // than the GLD1 nodes added for the SVE gather load intrinsics. |
120 | static cl::opt<bool> |
121 | EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, |
122 | cl::desc("Combine extends of AArch64 masked " |
123 | "gather intrinsics"), |
124 | cl::init(true)); |
125 | |
126 | /// Value type used for condition codes. |
127 | static const MVT MVT_CC = MVT::i32; |
128 | |
129 | static inline EVT getPackedSVEVectorVT(EVT VT) { |
130 | switch (VT.getSimpleVT().SimpleTy) { |
131 | default: |
132 | llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 132); |
133 | case MVT::i8: |
134 | return MVT::nxv16i8; |
135 | case MVT::i16: |
136 | return MVT::nxv8i16; |
137 | case MVT::i32: |
138 | return MVT::nxv4i32; |
139 | case MVT::i64: |
140 | return MVT::nxv2i64; |
141 | case MVT::f16: |
142 | return MVT::nxv8f16; |
143 | case MVT::f32: |
144 | return MVT::nxv4f32; |
145 | case MVT::f64: |
146 | return MVT::nxv2f64; |
147 | case MVT::bf16: |
148 | return MVT::nxv8bf16; |
149 | } |
150 | } |
151 | |
152 | // NOTE: Currently there's only a need to return integer vector types. If this |
153 | // changes then just add an extra "type" parameter. |
154 | static inline EVT getPackedSVEVectorVT(ElementCount EC) { |
155 | switch (EC.getKnownMinValue()) { |
156 | default: |
157 | llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 157); |
158 | case 16: |
159 | return MVT::nxv16i8; |
160 | case 8: |
161 | return MVT::nxv8i16; |
162 | case 4: |
163 | return MVT::nxv4i32; |
164 | case 2: |
165 | return MVT::nxv2i64; |
166 | } |
167 | } |
168 | |
169 | static inline EVT getPromotedVTForPredicate(EVT VT) { |
170 | assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&((VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!" ) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 171, __PRETTY_FUNCTION__)) |
171 | "Expected scalable predicate vector type!")((VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!" ) ? static_cast<void> (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 171, __PRETTY_FUNCTION__)); |
172 | switch (VT.getVectorMinNumElements()) { |
173 | default: |
174 | llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 174); |
175 | case 2: |
176 | return MVT::nxv2i64; |
177 | case 4: |
178 | return MVT::nxv4i32; |
179 | case 8: |
180 | return MVT::nxv8i16; |
181 | case 16: |
182 | return MVT::nxv16i8; |
183 | } |
184 | } |
185 | |
186 | /// Returns true if VT's elements occupy the lowest bit positions of its |
187 | /// associated register class without any intervening space. |
188 | /// |
189 | /// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the |
190 | /// same register class, but only nxv8f16 can be treated as a packed vector. |
191 | static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) { |
192 | assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal (VT) && "Expected legal vector type!") ? static_cast< void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 193, __PRETTY_FUNCTION__)) |
193 | "Expected legal vector type!")((VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal (VT) && "Expected legal vector type!") ? static_cast< void> (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 193, __PRETTY_FUNCTION__)); |
194 | return VT.isFixedLengthVector() || |
195 | VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock; |
196 | } |
197 | |
198 | // Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading |
199 | // predicate and end with a passthru value matching the result type. |
200 | static bool isMergePassthruOpcode(unsigned Opc) { |
201 | switch (Opc) { |
202 | default: |
203 | return false; |
204 | case AArch64ISD::BITREVERSE_MERGE_PASSTHRU: |
205 | case AArch64ISD::BSWAP_MERGE_PASSTHRU: |
206 | case AArch64ISD::CTLZ_MERGE_PASSTHRU: |
207 | case AArch64ISD::CTPOP_MERGE_PASSTHRU: |
208 | case AArch64ISD::DUP_MERGE_PASSTHRU: |
209 | case AArch64ISD::ABS_MERGE_PASSTHRU: |
210 | case AArch64ISD::NEG_MERGE_PASSTHRU: |
211 | case AArch64ISD::FNEG_MERGE_PASSTHRU: |
212 | case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: |
213 | case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: |
214 | case AArch64ISD::FCEIL_MERGE_PASSTHRU: |
215 | case AArch64ISD::FFLOOR_MERGE_PASSTHRU: |
216 | case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: |
217 | case AArch64ISD::FRINT_MERGE_PASSTHRU: |
218 | case AArch64ISD::FROUND_MERGE_PASSTHRU: |
219 | case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: |
220 | case AArch64ISD::FTRUNC_MERGE_PASSTHRU: |
221 | case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: |
222 | case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: |
223 | case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: |
224 | case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: |
225 | case AArch64ISD::FCVTZU_MERGE_PASSTHRU: |
226 | case AArch64ISD::FCVTZS_MERGE_PASSTHRU: |
227 | case AArch64ISD::FSQRT_MERGE_PASSTHRU: |
228 | case AArch64ISD::FRECPX_MERGE_PASSTHRU: |
229 | case AArch64ISD::FABS_MERGE_PASSTHRU: |
230 | return true; |
231 | } |
232 | } |
233 | |
234 | AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, |
235 | const AArch64Subtarget &STI) |
236 | : TargetLowering(TM), Subtarget(&STI) { |
237 | // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so |
238 | // we have to make something up. Arbitrarily, choose ZeroOrOne. |
239 | setBooleanContents(ZeroOrOneBooleanContent); |
240 | // When comparing vectors the result sets the different elements in the |
241 | // vector to all-one or all-zero. |
242 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
243 | |
244 | // Set up the register classes. |
245 | addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); |
246 | addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); |
247 | |
248 | if (Subtarget->hasFPARMv8()) { |
249 | addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); |
250 | addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass); |
251 | addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); |
252 | addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); |
253 | addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); |
254 | } |
255 | |
256 | if (Subtarget->hasNEON()) { |
257 | addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); |
258 | addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); |
259 | // Someone set us up the NEON. |
260 | addDRTypeForNEON(MVT::v2f32); |
261 | addDRTypeForNEON(MVT::v8i8); |
262 | addDRTypeForNEON(MVT::v4i16); |
263 | addDRTypeForNEON(MVT::v2i32); |
264 | addDRTypeForNEON(MVT::v1i64); |
265 | addDRTypeForNEON(MVT::v1f64); |
266 | addDRTypeForNEON(MVT::v4f16); |
267 | if (Subtarget->hasBF16()) |
268 | addDRTypeForNEON(MVT::v4bf16); |
269 | |
270 | addQRTypeForNEON(MVT::v4f32); |
271 | addQRTypeForNEON(MVT::v2f64); |
272 | addQRTypeForNEON(MVT::v16i8); |
273 | addQRTypeForNEON(MVT::v8i16); |
274 | addQRTypeForNEON(MVT::v4i32); |
275 | addQRTypeForNEON(MVT::v2i64); |
276 | addQRTypeForNEON(MVT::v8f16); |
277 | if (Subtarget->hasBF16()) |
278 | addQRTypeForNEON(MVT::v8bf16); |
279 | } |
280 | |
281 | if (Subtarget->hasSVE()) { |
282 | // Add legal sve predicate types |
283 | addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); |
284 | addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); |
285 | addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); |
286 | addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); |
287 | |
288 | // Add legal sve data types |
289 | addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); |
290 | addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); |
291 | addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); |
292 | addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); |
293 | |
294 | addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); |
295 | addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); |
296 | addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); |
297 | addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); |
298 | addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); |
299 | addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); |
300 | |
301 | if (Subtarget->hasBF16()) { |
302 | addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass); |
303 | addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass); |
304 | addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass); |
305 | } |
306 | |
307 | if (Subtarget->useSVEForFixedLengthVectors()) { |
308 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
309 | if (useSVEForFixedLengthVectorVT(VT)) |
310 | addRegisterClass(VT, &AArch64::ZPRRegClass); |
311 | |
312 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
313 | if (useSVEForFixedLengthVectorVT(VT)) |
314 | addRegisterClass(VT, &AArch64::ZPRRegClass); |
315 | } |
316 | |
317 | for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) { |
318 | setOperationAction(ISD::SADDSAT, VT, Legal); |
319 | setOperationAction(ISD::UADDSAT, VT, Legal); |
320 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
321 | setOperationAction(ISD::USUBSAT, VT, Legal); |
322 | setOperationAction(ISD::UREM, VT, Expand); |
323 | setOperationAction(ISD::SREM, VT, Expand); |
324 | setOperationAction(ISD::SDIVREM, VT, Expand); |
325 | setOperationAction(ISD::UDIVREM, VT, Expand); |
326 | } |
327 | |
328 | for (auto VT : |
329 | { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, |
330 | MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) |
331 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); |
332 | |
333 | for (auto VT : |
334 | { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, |
335 | MVT::nxv2f64 }) { |
336 | setCondCodeAction(ISD::SETO, VT, Expand); |
337 | setCondCodeAction(ISD::SETOLT, VT, Expand); |
338 | setCondCodeAction(ISD::SETLT, VT, Expand); |
339 | setCondCodeAction(ISD::SETOLE, VT, Expand); |
340 | setCondCodeAction(ISD::SETLE, VT, Expand); |
341 | setCondCodeAction(ISD::SETULT, VT, Expand); |
342 | setCondCodeAction(ISD::SETULE, VT, Expand); |
343 | setCondCodeAction(ISD::SETUGE, VT, Expand); |
344 | setCondCodeAction(ISD::SETUGT, VT, Expand); |
345 | setCondCodeAction(ISD::SETUEQ, VT, Expand); |
346 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
347 | |
348 | setOperationAction(ISD::FREM, VT, Expand); |
349 | setOperationAction(ISD::FPOW, VT, Expand); |
350 | setOperationAction(ISD::FPOWI, VT, Expand); |
351 | setOperationAction(ISD::FCOS, VT, Expand); |
352 | setOperationAction(ISD::FSIN, VT, Expand); |
353 | setOperationAction(ISD::FSINCOS, VT, Expand); |
354 | setOperationAction(ISD::FEXP, VT, Expand); |
355 | setOperationAction(ISD::FEXP2, VT, Expand); |
356 | setOperationAction(ISD::FLOG, VT, Expand); |
357 | setOperationAction(ISD::FLOG2, VT, Expand); |
358 | setOperationAction(ISD::FLOG10, VT, Expand); |
359 | } |
360 | } |
361 | |
362 | // Compute derived properties from the register classes |
363 | computeRegisterProperties(Subtarget->getRegisterInfo()); |
364 | |
365 | // Provide all sorts of operation actions |
366 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
367 | setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
368 | setOperationAction(ISD::SETCC, MVT::i32, Custom); |
369 | setOperationAction(ISD::SETCC, MVT::i64, Custom); |
370 | setOperationAction(ISD::SETCC, MVT::f16, Custom); |
371 | setOperationAction(ISD::SETCC, MVT::f32, Custom); |
372 | setOperationAction(ISD::SETCC, MVT::f64, Custom); |
373 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
374 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); |
375 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); |
376 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
377 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); |
378 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); |
379 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
380 | setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
381 | setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
382 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
383 | setOperationAction(ISD::BR_CC, MVT::i64, Custom); |
384 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
385 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
386 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
387 | setOperationAction(ISD::SELECT, MVT::i32, Custom); |
388 | setOperationAction(ISD::SELECT, MVT::i64, Custom); |
389 | setOperationAction(ISD::SELECT, MVT::f16, Custom); |
390 | setOperationAction(ISD::SELECT, MVT::f32, Custom); |
391 | setOperationAction(ISD::SELECT, MVT::f64, Custom); |
392 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
393 | setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); |
394 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
395 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
396 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
397 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
398 | setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
399 | |
400 | setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
401 | setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
402 | setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
403 | |
404 | setOperationAction(ISD::FREM, MVT::f32, Expand); |
405 | setOperationAction(ISD::FREM, MVT::f64, Expand); |
406 | setOperationAction(ISD::FREM, MVT::f80, Expand); |
407 | |
408 | setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
409 | |
410 | // Custom lowering hooks are needed for XOR |
411 | // to fold it into CSINC/CSINV. |
412 | setOperationAction(ISD::XOR, MVT::i32, Custom); |
413 | setOperationAction(ISD::XOR, MVT::i64, Custom); |
414 | |
415 | // Virtually no operation on f128 is legal, but LLVM can't expand them when |
416 | // there's a valid register class, so we need custom operations in most cases. |
417 | setOperationAction(ISD::FABS, MVT::f128, Expand); |
418 | setOperationAction(ISD::FADD, MVT::f128, LibCall); |
419 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
420 | setOperationAction(ISD::FCOS, MVT::f128, Expand); |
421 | setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
422 | setOperationAction(ISD::FMA, MVT::f128, Expand); |
423 | setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
424 | setOperationAction(ISD::FNEG, MVT::f128, Expand); |
425 | setOperationAction(ISD::FPOW, MVT::f128, Expand); |
426 | setOperationAction(ISD::FREM, MVT::f128, Expand); |
427 | setOperationAction(ISD::FRINT, MVT::f128, Expand); |
428 | setOperationAction(ISD::FSIN, MVT::f128, Expand); |
429 | setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
430 | setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
431 | setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
432 | setOperationAction(ISD::FTRUNC, MVT::f128, Expand); |
433 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
434 | setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); |
435 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom); |
436 | setOperationAction(ISD::BR_CC, MVT::f128, Custom); |
437 | setOperationAction(ISD::SELECT, MVT::f128, Custom); |
438 | setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); |
439 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
440 | |
441 | // Lowering for many of the conversions is actually specified by the non-f128 |
442 | // type. The LowerXXX function will be trivial when f128 isn't involved. |
443 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
444 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
445 | setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); |
446 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
447 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
448 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); |
449 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
450 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
451 | setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); |
452 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
453 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
454 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); |
455 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
456 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
457 | setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); |
458 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
459 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
460 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); |
461 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
462 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
463 | setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); |
464 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
465 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
466 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); |
467 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
468 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
469 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
470 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
471 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
472 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
473 | |
474 | // Variable arguments. |
475 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
476 | setOperationAction(ISD::VAARG, MVT::Other, Custom); |
477 | setOperationAction(ISD::VACOPY, MVT::Other, Custom); |
478 | setOperationAction(ISD::VAEND, MVT::Other, Expand); |
479 | |
480 | // Variable-sized objects. |
481 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
482 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
483 | |
484 | if (Subtarget->isTargetWindows()) |
485 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); |
486 | else |
487 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); |
488 | |
489 | // Constant pool entries |
490 | setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
491 | |
492 | // BlockAddress |
493 | setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
494 | |
495 | // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. |
496 | setOperationAction(ISD::ADDC, MVT::i32, Custom); |
497 | setOperationAction(ISD::ADDE, MVT::i32, Custom); |
498 | setOperationAction(ISD::SUBC, MVT::i32, Custom); |
499 | setOperationAction(ISD::SUBE, MVT::i32, Custom); |
500 | setOperationAction(ISD::ADDC, MVT::i64, Custom); |
501 | setOperationAction(ISD::ADDE, MVT::i64, Custom); |
502 | setOperationAction(ISD::SUBC, MVT::i64, Custom); |
503 | setOperationAction(ISD::SUBE, MVT::i64, Custom); |
504 | |
505 | // AArch64 lacks both left-rotate and popcount instructions. |
506 | setOperationAction(ISD::ROTL, MVT::i32, Expand); |
507 | setOperationAction(ISD::ROTL, MVT::i64, Expand); |
508 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
509 | setOperationAction(ISD::ROTL, VT, Expand); |
510 | setOperationAction(ISD::ROTR, VT, Expand); |
511 | } |
512 | |
513 | // AArch64 doesn't have i32 MULH{S|U}. |
514 | setOperationAction(ISD::MULHU, MVT::i32, Expand); |
515 | setOperationAction(ISD::MULHS, MVT::i32, Expand); |
516 | |
517 | // AArch64 doesn't have {U|S}MUL_LOHI. |
518 | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
519 | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
520 | |
521 | setOperationAction(ISD::CTPOP, MVT::i32, Custom); |
522 | setOperationAction(ISD::CTPOP, MVT::i64, Custom); |
523 | setOperationAction(ISD::CTPOP, MVT::i128, Custom); |
524 | |
525 | setOperationAction(ISD::ABS, MVT::i32, Custom); |
526 | setOperationAction(ISD::ABS, MVT::i64, Custom); |
527 | |
528 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
529 | setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
530 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
531 | setOperationAction(ISD::SDIVREM, VT, Expand); |
532 | setOperationAction(ISD::UDIVREM, VT, Expand); |
533 | } |
534 | setOperationAction(ISD::SREM, MVT::i32, Expand); |
535 | setOperationAction(ISD::SREM, MVT::i64, Expand); |
536 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
537 | setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
538 | setOperationAction(ISD::UREM, MVT::i32, Expand); |
539 | setOperationAction(ISD::UREM, MVT::i64, Expand); |
540 | |
541 | // Custom lower Add/Sub/Mul with overflow. |
542 | setOperationAction(ISD::SADDO, MVT::i32, Custom); |
543 | setOperationAction(ISD::SADDO, MVT::i64, Custom); |
544 | setOperationAction(ISD::UADDO, MVT::i32, Custom); |
545 | setOperationAction(ISD::UADDO, MVT::i64, Custom); |
546 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
547 | setOperationAction(ISD::SSUBO, MVT::i64, Custom); |
548 | setOperationAction(ISD::USUBO, MVT::i32, Custom); |
549 | setOperationAction(ISD::USUBO, MVT::i64, Custom); |
550 | setOperationAction(ISD::SMULO, MVT::i32, Custom); |
551 | setOperationAction(ISD::SMULO, MVT::i64, Custom); |
552 | setOperationAction(ISD::UMULO, MVT::i32, Custom); |
553 | setOperationAction(ISD::UMULO, MVT::i64, Custom); |
554 | |
555 | setOperationAction(ISD::FSIN, MVT::f32, Expand); |
556 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
557 | setOperationAction(ISD::FCOS, MVT::f32, Expand); |
558 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
559 | setOperationAction(ISD::FPOW, MVT::f32, Expand); |
560 | setOperationAction(ISD::FPOW, MVT::f64, Expand); |
561 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
562 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
563 | if (Subtarget->hasFullFP16()) |
564 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); |
565 | else |
566 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); |
567 | |
568 | setOperationAction(ISD::FREM, MVT::f16, Promote); |
569 | setOperationAction(ISD::FREM, MVT::v4f16, Expand); |
570 | setOperationAction(ISD::FREM, MVT::v8f16, Expand); |
571 | setOperationAction(ISD::FPOW, MVT::f16, Promote); |
572 | setOperationAction(ISD::FPOW, MVT::v4f16, Expand); |
573 | setOperationAction(ISD::FPOW, MVT::v8f16, Expand); |
574 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
575 | setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); |
576 | setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); |
577 | setOperationAction(ISD::FCOS, MVT::f16, Promote); |
578 | setOperationAction(ISD::FCOS, MVT::v4f16, Expand); |
579 | setOperationAction(ISD::FCOS, MVT::v8f16, Expand); |
580 | setOperationAction(ISD::FSIN, MVT::f16, Promote); |
581 | setOperationAction(ISD::FSIN, MVT::v4f16, Expand); |
582 | setOperationAction(ISD::FSIN, MVT::v8f16, Expand); |
583 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
584 | setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); |
585 | setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); |
586 | setOperationAction(ISD::FEXP, MVT::f16, Promote); |
587 | setOperationAction(ISD::FEXP, MVT::v4f16, Expand); |
588 | setOperationAction(ISD::FEXP, MVT::v8f16, Expand); |
589 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
590 | setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); |
591 | setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); |
592 | setOperationAction(ISD::FLOG, MVT::f16, Promote); |
593 | setOperationAction(ISD::FLOG, MVT::v4f16, Expand); |
594 | setOperationAction(ISD::FLOG, MVT::v8f16, Expand); |
595 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
596 | setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); |
597 | setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); |
598 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
599 | setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); |
600 | setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); |
601 | |
602 | if (!Subtarget->hasFullFP16()) { |
603 | setOperationAction(ISD::SELECT, MVT::f16, Promote); |
604 | setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); |
605 | setOperationAction(ISD::SETCC, MVT::f16, Promote); |
606 | setOperationAction(ISD::BR_CC, MVT::f16, Promote); |
607 | setOperationAction(ISD::FADD, MVT::f16, Promote); |
608 | setOperationAction(ISD::FSUB, MVT::f16, Promote); |
609 | setOperationAction(ISD::FMUL, MVT::f16, Promote); |
610 | setOperationAction(ISD::FDIV, MVT::f16, Promote); |
611 | setOperationAction(ISD::FMA, MVT::f16, Promote); |
612 | setOperationAction(ISD::FNEG, MVT::f16, Promote); |
613 | setOperationAction(ISD::FABS, MVT::f16, Promote); |
614 | setOperationAction(ISD::FCEIL, MVT::f16, Promote); |
615 | setOperationAction(ISD::FSQRT, MVT::f16, Promote); |
616 | setOperationAction(ISD::FFLOOR, MVT::f16, Promote); |
617 | setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); |
618 | setOperationAction(ISD::FRINT, MVT::f16, Promote); |
619 | setOperationAction(ISD::FROUND, MVT::f16, Promote); |
620 | setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); |
621 | setOperationAction(ISD::FTRUNC, MVT::f16, Promote); |
622 | setOperationAction(ISD::FMINNUM, MVT::f16, Promote); |
623 | setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); |
624 | setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); |
625 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); |
626 | |
627 | // promote v4f16 to v4f32 when that is known to be safe. |
628 | setOperationAction(ISD::FADD, MVT::v4f16, Promote); |
629 | setOperationAction(ISD::FSUB, MVT::v4f16, Promote); |
630 | setOperationAction(ISD::FMUL, MVT::v4f16, Promote); |
631 | setOperationAction(ISD::FDIV, MVT::v4f16, Promote); |
632 | AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); |
633 | AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); |
634 | AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); |
635 | AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); |
636 | |
637 | setOperationAction(ISD::FABS, MVT::v4f16, Expand); |
638 | setOperationAction(ISD::FNEG, MVT::v4f16, Expand); |
639 | setOperationAction(ISD::FROUND, MVT::v4f16, Expand); |
640 | setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); |
641 | setOperationAction(ISD::FMA, MVT::v4f16, Expand); |
642 | setOperationAction(ISD::SETCC, MVT::v4f16, Expand); |
643 | setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); |
644 | setOperationAction(ISD::SELECT, MVT::v4f16, Expand); |
645 | setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); |
646 | setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); |
647 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); |
648 | setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); |
649 | setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); |
650 | setOperationAction(ISD::FRINT, MVT::v4f16, Expand); |
651 | setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); |
652 | setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); |
653 | |
654 | setOperationAction(ISD::FABS, MVT::v8f16, Expand); |
655 | setOperationAction(ISD::FADD, MVT::v8f16, Expand); |
656 | setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); |
657 | setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); |
658 | setOperationAction(ISD::FDIV, MVT::v8f16, Expand); |
659 | setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); |
660 | setOperationAction(ISD::FMA, MVT::v8f16, Expand); |
661 | setOperationAction(ISD::FMUL, MVT::v8f16, Expand); |
662 | setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); |
663 | setOperationAction(ISD::FNEG, MVT::v8f16, Expand); |
664 | setOperationAction(ISD::FROUND, MVT::v8f16, Expand); |
665 | setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); |
666 | setOperationAction(ISD::FRINT, MVT::v8f16, Expand); |
667 | setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); |
668 | setOperationAction(ISD::FSUB, MVT::v8f16, Expand); |
669 | setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); |
670 | setOperationAction(ISD::SETCC, MVT::v8f16, Expand); |
671 | setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); |
672 | setOperationAction(ISD::SELECT, MVT::v8f16, Expand); |
673 | setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); |
674 | setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); |
675 | } |
676 | |
677 | // AArch64 has implementations of a lot of rounding-like FP operations. |
678 | for (MVT Ty : {MVT::f32, MVT::f64}) { |
679 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
680 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
681 | setOperationAction(ISD::FCEIL, Ty, Legal); |
682 | setOperationAction(ISD::FRINT, Ty, Legal); |
683 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
684 | setOperationAction(ISD::FROUND, Ty, Legal); |
685 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
686 | setOperationAction(ISD::FMINNUM, Ty, Legal); |
687 | setOperationAction(ISD::FMAXNUM, Ty, Legal); |
688 | setOperationAction(ISD::FMINIMUM, Ty, Legal); |
689 | setOperationAction(ISD::FMAXIMUM, Ty, Legal); |
690 | setOperationAction(ISD::LROUND, Ty, Legal); |
691 | setOperationAction(ISD::LLROUND, Ty, Legal); |
692 | setOperationAction(ISD::LRINT, Ty, Legal); |
693 | setOperationAction(ISD::LLRINT, Ty, Legal); |
694 | } |
695 | |
696 | if (Subtarget->hasFullFP16()) { |
697 | setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); |
698 | setOperationAction(ISD::FFLOOR, MVT::f16, Legal); |
699 | setOperationAction(ISD::FCEIL, MVT::f16, Legal); |
700 | setOperationAction(ISD::FRINT, MVT::f16, Legal); |
701 | setOperationAction(ISD::FTRUNC, MVT::f16, Legal); |
702 | setOperationAction(ISD::FROUND, MVT::f16, Legal); |
703 | setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); |
704 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
705 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
706 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
707 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
708 | } |
709 | |
710 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
711 | |
712 | setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
713 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
714 | |
715 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); |
716 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); |
717 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); |
718 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); |
719 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); |
720 | |
721 | // Generate outline atomics library calls only if LSE was not specified for |
722 | // subtarget |
723 | if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) { |
724 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall); |
725 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall); |
726 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); |
727 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall); |
728 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall); |
729 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall); |
730 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall); |
731 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); |
732 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall); |
733 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall); |
734 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall); |
735 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); |
736 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall); |
737 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall); |
738 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall); |
739 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); |
740 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall); |
741 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall); |
742 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall); |
743 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall); |
744 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall); |
745 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall); |
746 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall); |
747 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); |
748 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall); |
749 | #define LCALLNAMES(A, B, N) \ |
750 | setLibcallName(A##N##_RELAX, #B #N "_relax"); \ |
751 | setLibcallName(A##N##_ACQ, #B #N "_acq"); \ |
752 | setLibcallName(A##N##_REL, #B #N "_rel"); \ |
753 | setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); |
754 | #define LCALLNAME4(A, B) \ |
755 | LCALLNAMES(A, B, 1) \ |
756 | LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) |
757 | #define LCALLNAME5(A, B) \ |
758 | LCALLNAMES(A, B, 1) \ |
759 | LCALLNAMES(A, B, 2) \ |
760 | LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16) |
761 | LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas) |
762 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp) |
763 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd) |
764 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset) |
765 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr) |
766 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor) |
767 | #undef LCALLNAMES |
768 | #undef LCALLNAME4 |
769 | #undef LCALLNAME5 |
770 | } |
771 | |
772 | // 128-bit loads and stores can be done without expanding |
773 | setOperationAction(ISD::LOAD, MVT::i128, Custom); |
774 | setOperationAction(ISD::STORE, MVT::i128, Custom); |
775 | |
776 | // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the |
777 | // custom lowering, as there are no un-paired non-temporal stores and |
778 | // legalization will break up 256 bit inputs. |
779 | setOperationAction(ISD::STORE, MVT::v32i8, Custom); |
780 | setOperationAction(ISD::STORE, MVT::v16i16, Custom); |
781 | setOperationAction(ISD::STORE, MVT::v16f16, Custom); |
782 | setOperationAction(ISD::STORE, MVT::v8i32, Custom); |
783 | setOperationAction(ISD::STORE, MVT::v8f32, Custom); |
784 | setOperationAction(ISD::STORE, MVT::v4f64, Custom); |
785 | setOperationAction(ISD::STORE, MVT::v4i64, Custom); |
786 | |
787 | // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. |
788 | // This requires the Performance Monitors extension. |
789 | if (Subtarget->hasPerfMon()) |
790 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); |
791 | |
792 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
793 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
794 | // Issue __sincos_stret if available. |
795 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
796 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
797 | } else { |
798 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
799 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
800 | } |
801 | |
802 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { |
803 | // MSVCRT doesn't have powi; fall back to pow |
804 | setLibcallName(RTLIB::POWI_F32, nullptr); |
805 | setLibcallName(RTLIB::POWI_F64, nullptr); |
806 | } |
807 | |
808 | // Make floating-point constants legal for the large code model, so they don't |
809 | // become loads from the constant pool. |
810 | if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { |
811 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
812 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
813 | } |
814 | |
815 | // AArch64 does not have floating-point extending loads, i1 sign-extending |
816 | // load, floating-point truncating stores, or v2i32->v2i16 truncating store. |
817 | for (MVT VT : MVT::fp_valuetypes()) { |
818 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
819 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
820 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); |
821 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); |
822 | } |
823 | for (MVT VT : MVT::integer_valuetypes()) |
824 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); |
825 | |
826 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
827 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
828 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
829 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
830 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
831 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
832 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
833 | |
834 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
835 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
836 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
837 | |
838 | // Indexed loads and stores are supported. |
839 | for (unsigned im = (unsigned)ISD::PRE_INC; |
840 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
841 | setIndexedLoadAction(im, MVT::i8, Legal); |
842 | setIndexedLoadAction(im, MVT::i16, Legal); |
843 | setIndexedLoadAction(im, MVT::i32, Legal); |
844 | setIndexedLoadAction(im, MVT::i64, Legal); |
845 | setIndexedLoadAction(im, MVT::f64, Legal); |
846 | setIndexedLoadAction(im, MVT::f32, Legal); |
847 | setIndexedLoadAction(im, MVT::f16, Legal); |
848 | setIndexedLoadAction(im, MVT::bf16, Legal); |
849 | setIndexedStoreAction(im, MVT::i8, Legal); |
850 | setIndexedStoreAction(im, MVT::i16, Legal); |
851 | setIndexedStoreAction(im, MVT::i32, Legal); |
852 | setIndexedStoreAction(im, MVT::i64, Legal); |
853 | setIndexedStoreAction(im, MVT::f64, Legal); |
854 | setIndexedStoreAction(im, MVT::f32, Legal); |
855 | setIndexedStoreAction(im, MVT::f16, Legal); |
856 | setIndexedStoreAction(im, MVT::bf16, Legal); |
857 | } |
858 | |
859 | // Trap. |
860 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
861 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
862 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
863 | |
864 | // We combine OR nodes for bitfield operations. |
865 | setTargetDAGCombine(ISD::OR); |
866 | // Try to create BICs for vector ANDs. |
867 | setTargetDAGCombine(ISD::AND); |
868 | |
869 | // Vector add and sub nodes may conceal a high-half opportunity. |
870 | // Also, try to fold ADD into CSINC/CSINV.. |
871 | setTargetDAGCombine(ISD::ADD); |
872 | setTargetDAGCombine(ISD::ABS); |
873 | setTargetDAGCombine(ISD::SUB); |
874 | setTargetDAGCombine(ISD::SRL); |
875 | setTargetDAGCombine(ISD::XOR); |
876 | setTargetDAGCombine(ISD::SINT_TO_FP); |
877 | setTargetDAGCombine(ISD::UINT_TO_FP); |
878 | |
879 | setTargetDAGCombine(ISD::FP_TO_SINT); |
880 | setTargetDAGCombine(ISD::FP_TO_UINT); |
881 | setTargetDAGCombine(ISD::FDIV); |
882 | |
883 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
884 | |
885 | setTargetDAGCombine(ISD::ANY_EXTEND); |
886 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
887 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
888 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
889 | setTargetDAGCombine(ISD::TRUNCATE); |
890 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
891 | setTargetDAGCombine(ISD::STORE); |
892 | if (Subtarget->supportsAddressTopByteIgnored()) |
893 | setTargetDAGCombine(ISD::LOAD); |
894 | |
895 | setTargetDAGCombine(ISD::MUL); |
896 | |
897 | setTargetDAGCombine(ISD::SELECT); |
898 | setTargetDAGCombine(ISD::VSELECT); |
899 | |
900 | setTargetDAGCombine(ISD::INTRINSIC_VOID); |
901 | setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
902 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
903 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
904 | setTargetDAGCombine(ISD::VECREDUCE_ADD); |
905 | |
906 | setTargetDAGCombine(ISD::GlobalAddress); |
907 | |
908 | // In case of strict alignment, avoid an excessive number of byte wide stores. |
909 | MaxStoresPerMemsetOptSize = 8; |
910 | MaxStoresPerMemset = Subtarget->requiresStrictAlign() |
911 | ? MaxStoresPerMemsetOptSize : 32; |
912 | |
913 | MaxGluedStoresPerMemcpy = 4; |
914 | MaxStoresPerMemcpyOptSize = 4; |
915 | MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() |
916 | ? MaxStoresPerMemcpyOptSize : 16; |
917 | |
918 | MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; |
919 | |
920 | MaxLoadsPerMemcmpOptSize = 4; |
921 | MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() |
922 | ? MaxLoadsPerMemcmpOptSize : 8; |
923 | |
924 | setStackPointerRegisterToSaveRestore(AArch64::SP); |
925 | |
926 | setSchedulingPreference(Sched::Hybrid); |
927 | |
928 | EnableExtLdPromotion = true; |
929 | |
930 | // Set required alignment. |
931 | setMinFunctionAlignment(Align(4)); |
932 | // Set preferred alignments. |
933 | setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); |
934 | setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); |
935 | |
936 | // Only change the limit for entries in a jump table if specified by |
937 | // the sub target, but not at the command line. |
938 | unsigned MaxJT = STI.getMaximumJumpTableSize(); |
939 | if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U)) |
940 | setMaximumJumpTableSize(MaxJT); |
941 | |
942 | setHasExtractBitsInsn(true); |
943 | |
944 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
945 | |
946 | if (Subtarget->hasNEON()) { |
947 | // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to |
948 | // silliness like this: |
949 | setOperationAction(ISD::FABS, MVT::v1f64, Expand); |
950 | setOperationAction(ISD::FADD, MVT::v1f64, Expand); |
951 | setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); |
952 | setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); |
953 | setOperationAction(ISD::FCOS, MVT::v1f64, Expand); |
954 | setOperationAction(ISD::FDIV, MVT::v1f64, Expand); |
955 | setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); |
956 | setOperationAction(ISD::FMA, MVT::v1f64, Expand); |
957 | setOperationAction(ISD::FMUL, MVT::v1f64, Expand); |
958 | setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); |
959 | setOperationAction(ISD::FNEG, MVT::v1f64, Expand); |
960 | setOperationAction(ISD::FPOW, MVT::v1f64, Expand); |
961 | setOperationAction(ISD::FREM, MVT::v1f64, Expand); |
962 | setOperationAction(ISD::FROUND, MVT::v1f64, Expand); |
963 | setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); |
964 | setOperationAction(ISD::FRINT, MVT::v1f64, Expand); |
965 | setOperationAction(ISD::FSIN, MVT::v1f64, Expand); |
966 | setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); |
967 | setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); |
968 | setOperationAction(ISD::FSUB, MVT::v1f64, Expand); |
969 | setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); |
970 | setOperationAction(ISD::SETCC, MVT::v1f64, Expand); |
971 | setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); |
972 | setOperationAction(ISD::SELECT, MVT::v1f64, Expand); |
973 | setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); |
974 | setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); |
975 | |
976 | setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); |
977 | setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); |
978 | setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); |
979 | setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); |
980 | setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); |
981 | |
982 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
983 | |
984 | // AArch64 doesn't have a direct vector ->f32 conversion instructions for |
985 | // elements smaller than i32, so promote the input to i32 first. |
986 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); |
987 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); |
988 | // i8 vector elements also need promotion to i32 for v8i8 |
989 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); |
990 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); |
991 | // Similarly, there is no direct i32 -> f64 vector conversion instruction. |
992 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
993 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
994 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); |
995 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); |
996 | // Or, direct i32 -> f16 vector conversion. Set it so custom, so the |
997 | // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 |
998 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); |
999 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
1000 | |
1001 | if (Subtarget->hasFullFP16()) { |
1002 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
1003 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
1004 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
1005 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
1006 | } else { |
1007 | // when AArch64 doesn't have fullfp16 support, promote the input |
1008 | // to i32 first. |
1009 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); |
1010 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); |
1011 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); |
1012 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); |
1013 | } |
1014 | |
1015 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
1016 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
1017 | |
1018 | // AArch64 doesn't have MUL.2d: |
1019 | setOperationAction(ISD::MUL, MVT::v2i64, Expand); |
1020 | // Custom handling for some quad-vector types to detect MULL. |
1021 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
1022 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
1023 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
1024 | |
1025 | // Saturates |
1026 | for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
1027 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1028 | setOperationAction(ISD::SADDSAT, VT, Legal); |
1029 | setOperationAction(ISD::UADDSAT, VT, Legal); |
1030 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
1031 | setOperationAction(ISD::USUBSAT, VT, Legal); |
1032 | } |
1033 | |
1034 | // Vector reductions |
1035 | for (MVT VT : { MVT::v4f16, MVT::v2f32, |
1036 | MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { |
1037 | if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { |
1038 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1039 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1040 | |
1041 | setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); |
1042 | } |
1043 | } |
1044 | for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
1045 | MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
1046 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1047 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1048 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1049 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1050 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1051 | } |
1052 | setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom); |
1053 | |
1054 | setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); |
1055 | setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); |
1056 | // Likewise, narrowing and extending vector loads/stores aren't handled |
1057 | // directly. |
1058 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
1059 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
1060 | |
1061 | if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { |
1062 | setOperationAction(ISD::MULHS, VT, Legal); |
1063 | setOperationAction(ISD::MULHU, VT, Legal); |
1064 | } else { |
1065 | setOperationAction(ISD::MULHS, VT, Expand); |
1066 | setOperationAction(ISD::MULHU, VT, Expand); |
1067 | } |
1068 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
1069 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
1070 | |
1071 | setOperationAction(ISD::BSWAP, VT, Expand); |
1072 | setOperationAction(ISD::CTTZ, VT, Expand); |
1073 | |
1074 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
1075 | setTruncStoreAction(VT, InnerVT, Expand); |
1076 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
1077 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
1078 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
1079 | } |
1080 | } |
1081 | |
1082 | // AArch64 has implementations of a lot of rounding-like FP operations. |
1083 | for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { |
1084 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
1085 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
1086 | setOperationAction(ISD::FCEIL, Ty, Legal); |
1087 | setOperationAction(ISD::FRINT, Ty, Legal); |
1088 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
1089 | setOperationAction(ISD::FROUND, Ty, Legal); |
1090 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
1091 | } |
1092 | |
1093 | if (Subtarget->hasFullFP16()) { |
1094 | for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { |
1095 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
1096 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
1097 | setOperationAction(ISD::FCEIL, Ty, Legal); |
1098 | setOperationAction(ISD::FRINT, Ty, Legal); |
1099 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
1100 | setOperationAction(ISD::FROUND, Ty, Legal); |
1101 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
1102 | } |
1103 | } |
1104 | |
1105 | if (Subtarget->hasSVE()) |
1106 | setOperationAction(ISD::VSCALE, MVT::i32, Custom); |
1107 | |
1108 | setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); |
1109 | } |
1110 | |
1111 | if (Subtarget->hasSVE()) { |
1112 | // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a |
1113 | // splat of 0 or undef) once vector selects supported in SVE codegen. See |
1114 | // D68877 for more details. |
1115 | for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { |
1116 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1117 | setOperationAction(ISD::BSWAP, VT, Custom); |
1118 | setOperationAction(ISD::CTLZ, VT, Custom); |
1119 | setOperationAction(ISD::CTPOP, VT, Custom); |
1120 | setOperationAction(ISD::CTTZ, VT, Custom); |
1121 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1122 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
1123 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
1124 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1125 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1126 | setOperationAction(ISD::MGATHER, VT, Custom); |
1127 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1128 | setOperationAction(ISD::MUL, VT, Custom); |
1129 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1130 | setOperationAction(ISD::SELECT, VT, Custom); |
1131 | setOperationAction(ISD::SETCC, VT, Custom); |
1132 | setOperationAction(ISD::SDIV, VT, Custom); |
1133 | setOperationAction(ISD::UDIV, VT, Custom); |
1134 | setOperationAction(ISD::SMIN, VT, Custom); |
1135 | setOperationAction(ISD::UMIN, VT, Custom); |
1136 | setOperationAction(ISD::SMAX, VT, Custom); |
1137 | setOperationAction(ISD::UMAX, VT, Custom); |
1138 | setOperationAction(ISD::SHL, VT, Custom); |
1139 | setOperationAction(ISD::SRL, VT, Custom); |
1140 | setOperationAction(ISD::SRA, VT, Custom); |
1141 | setOperationAction(ISD::ABS, VT, Custom); |
1142 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1143 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1144 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1145 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1146 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1147 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1148 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1149 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1150 | setOperationAction(ISD::STEP_VECTOR, VT, Custom); |
1151 | |
1152 | setOperationAction(ISD::MULHU, VT, Expand); |
1153 | setOperationAction(ISD::MULHS, VT, Expand); |
1154 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
1155 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
1156 | } |
1157 | |
1158 | // Illegal unpacked integer vector types. |
1159 | for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { |
1160 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1161 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1162 | } |
1163 | |
1164 | for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { |
1165 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1166 | setOperationAction(ISD::SELECT, VT, Custom); |
1167 | setOperationAction(ISD::SETCC, VT, Custom); |
1168 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1169 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1170 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1171 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1172 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1173 | |
1174 | // There are no legal MVT::nxv16f## based types. |
1175 | if (VT != MVT::nxv16i1) { |
1176 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
1177 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
1178 | } |
1179 | } |
1180 | |
1181 | for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, |
1182 | MVT::nxv4f32, MVT::nxv2f64}) { |
1183 | for (auto InnerVT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, |
1184 | MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { |
1185 | // Avoid marking truncating FP stores as legal to prevent the |
1186 | // DAGCombiner from creating unsupported truncating stores. |
1187 | setTruncStoreAction(VT, InnerVT, Expand); |
1188 | } |
1189 | |
1190 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1191 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1192 | setOperationAction(ISD::MGATHER, VT, Custom); |
1193 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1194 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1195 | setOperationAction(ISD::SELECT, VT, Custom); |
1196 | setOperationAction(ISD::FADD, VT, Custom); |
1197 | setOperationAction(ISD::FDIV, VT, Custom); |
1198 | setOperationAction(ISD::FMA, VT, Custom); |
1199 | setOperationAction(ISD::FMAXIMUM, VT, Custom); |
1200 | setOperationAction(ISD::FMAXNUM, VT, Custom); |
1201 | setOperationAction(ISD::FMINIMUM, VT, Custom); |
1202 | setOperationAction(ISD::FMINNUM, VT, Custom); |
1203 | setOperationAction(ISD::FMUL, VT, Custom); |
1204 | setOperationAction(ISD::FNEG, VT, Custom); |
1205 | setOperationAction(ISD::FSUB, VT, Custom); |
1206 | setOperationAction(ISD::FCEIL, VT, Custom); |
1207 | setOperationAction(ISD::FFLOOR, VT, Custom); |
1208 | setOperationAction(ISD::FNEARBYINT, VT, Custom); |
1209 | setOperationAction(ISD::FRINT, VT, Custom); |
1210 | setOperationAction(ISD::FROUND, VT, Custom); |
1211 | setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
1212 | setOperationAction(ISD::FTRUNC, VT, Custom); |
1213 | setOperationAction(ISD::FSQRT, VT, Custom); |
1214 | setOperationAction(ISD::FABS, VT, Custom); |
1215 | setOperationAction(ISD::FP_EXTEND, VT, Custom); |
1216 | setOperationAction(ISD::FP_ROUND, VT, Custom); |
1217 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1218 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1219 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1220 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1221 | } |
1222 | |
1223 | for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { |
1224 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1225 | setOperationAction(ISD::MGATHER, VT, Custom); |
1226 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1227 | } |
1228 | |
1229 | setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); |
1230 | |
1231 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); |
1232 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); |
1233 | |
1234 | // NOTE: Currently this has to happen after computeRegisterProperties rather |
1235 | // than the preferred option of combining it with the addRegisterClass call. |
1236 | if (Subtarget->useSVEForFixedLengthVectors()) { |
1237 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
1238 | if (useSVEForFixedLengthVectorVT(VT)) |
1239 | addTypeForFixedLengthSVE(VT); |
1240 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
1241 | if (useSVEForFixedLengthVectorVT(VT)) |
1242 | addTypeForFixedLengthSVE(VT); |
1243 | |
1244 | // 64bit results can mean a bigger than NEON input. |
1245 | for (auto VT : {MVT::v8i8, MVT::v4i16}) |
1246 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1247 | setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom); |
1248 | |
1249 | // 128bit results imply a bigger than NEON input. |
1250 | for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) |
1251 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1252 | for (auto VT : {MVT::v8f16, MVT::v4f32}) |
1253 | setOperationAction(ISD::FP_ROUND, VT, Expand); |
1254 | |
1255 | // These operations are not supported on NEON but SVE can do them. |
1256 | setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); |
1257 | setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); |
1258 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
1259 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
1260 | setOperationAction(ISD::MUL, MVT::v1i64, Custom); |
1261 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
1262 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
1263 | setOperationAction(ISD::SDIV, MVT::v16i8, Custom); |
1264 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
1265 | setOperationAction(ISD::SDIV, MVT::v8i16, Custom); |
1266 | setOperationAction(ISD::SDIV, MVT::v2i32, Custom); |
1267 | setOperationAction(ISD::SDIV, MVT::v4i32, Custom); |
1268 | setOperationAction(ISD::SDIV, MVT::v1i64, Custom); |
1269 | setOperationAction(ISD::SDIV, MVT::v2i64, Custom); |
1270 | setOperationAction(ISD::SMAX, MVT::v1i64, Custom); |
1271 | setOperationAction(ISD::SMAX, MVT::v2i64, Custom); |
1272 | setOperationAction(ISD::SMIN, MVT::v1i64, Custom); |
1273 | setOperationAction(ISD::SMIN, MVT::v2i64, Custom); |
1274 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
1275 | setOperationAction(ISD::UDIV, MVT::v16i8, Custom); |
1276 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
1277 | setOperationAction(ISD::UDIV, MVT::v8i16, Custom); |
1278 | setOperationAction(ISD::UDIV, MVT::v2i32, Custom); |
1279 | setOperationAction(ISD::UDIV, MVT::v4i32, Custom); |
1280 | setOperationAction(ISD::UDIV, MVT::v1i64, Custom); |
1281 | setOperationAction(ISD::UDIV, MVT::v2i64, Custom); |
1282 | setOperationAction(ISD::UMAX, MVT::v1i64, Custom); |
1283 | setOperationAction(ISD::UMAX, MVT::v2i64, Custom); |
1284 | setOperationAction(ISD::UMIN, MVT::v1i64, Custom); |
1285 | setOperationAction(ISD::UMIN, MVT::v2i64, Custom); |
1286 | setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); |
1287 | setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); |
1288 | setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); |
1289 | setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom); |
1290 | |
1291 | // Int operations with no NEON support. |
1292 | for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
1293 | MVT::v2i32, MVT::v4i32, MVT::v2i64}) { |
1294 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1295 | setOperationAction(ISD::CTTZ, VT, Custom); |
1296 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1297 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1298 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1299 | } |
1300 | |
1301 | // FP operations with no NEON support. |
1302 | for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, |
1303 | MVT::v1f64, MVT::v2f64}) |
1304 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1305 | |
1306 | // Use SVE for vectors with more than 2 elements. |
1307 | for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) |
1308 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1309 | } |
1310 | |
1311 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64); |
1312 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32); |
1313 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16); |
1314 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); |
1315 | } |
1316 | |
1317 | PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); |
1318 | } |
1319 | |
1320 | void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { |
1321 | assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast <void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1321, __PRETTY_FUNCTION__)); |
1322 | |
1323 | if (VT.isFloatingPoint()) { |
1324 | MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT(); |
1325 | setOperationPromotedToType(ISD::LOAD, VT, PromoteTo); |
1326 | setOperationPromotedToType(ISD::STORE, VT, PromoteTo); |
1327 | } |
1328 | |
1329 | // Mark vector float intrinsics as expand. |
1330 | if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { |
1331 | setOperationAction(ISD::FSIN, VT, Expand); |
1332 | setOperationAction(ISD::FCOS, VT, Expand); |
1333 | setOperationAction(ISD::FPOW, VT, Expand); |
1334 | setOperationAction(ISD::FLOG, VT, Expand); |
1335 | setOperationAction(ISD::FLOG2, VT, Expand); |
1336 | setOperationAction(ISD::FLOG10, VT, Expand); |
1337 | setOperationAction(ISD::FEXP, VT, Expand); |
1338 | setOperationAction(ISD::FEXP2, VT, Expand); |
1339 | |
1340 | // But we do support custom-lowering for FCOPYSIGN. |
1341 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1342 | } |
1343 | |
1344 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1345 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1346 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1347 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1348 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1349 | setOperationAction(ISD::SRA, VT, Custom); |
1350 | setOperationAction(ISD::SRL, VT, Custom); |
1351 | setOperationAction(ISD::SHL, VT, Custom); |
1352 | setOperationAction(ISD::OR, VT, Custom); |
1353 | setOperationAction(ISD::SETCC, VT, Custom); |
1354 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
1355 | |
1356 | setOperationAction(ISD::SELECT, VT, Expand); |
1357 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1358 | setOperationAction(ISD::VSELECT, VT, Expand); |
1359 | for (MVT InnerVT : MVT::all_valuetypes()) |
1360 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
1361 | |
1362 | // CNT supports only B element sizes, then use UADDLP to widen. |
1363 | if (VT != MVT::v8i8 && VT != MVT::v16i8) |
1364 | setOperationAction(ISD::CTPOP, VT, Custom); |
1365 | |
1366 | setOperationAction(ISD::UDIV, VT, Expand); |
1367 | setOperationAction(ISD::SDIV, VT, Expand); |
1368 | setOperationAction(ISD::UREM, VT, Expand); |
1369 | setOperationAction(ISD::SREM, VT, Expand); |
1370 | setOperationAction(ISD::FREM, VT, Expand); |
1371 | |
1372 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1373 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1374 | |
1375 | if (!VT.isFloatingPoint()) |
1376 | setOperationAction(ISD::ABS, VT, Legal); |
1377 | |
1378 | // [SU][MIN|MAX] are available for all NEON types apart from i64. |
1379 | if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) |
1380 | for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
1381 | setOperationAction(Opcode, VT, Legal); |
1382 | |
1383 | // F[MIN|MAX][NUM|NAN] are available for all FP NEON types. |
1384 | if (VT.isFloatingPoint() && |
1385 | VT.getVectorElementType() != MVT::bf16 && |
1386 | (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) |
1387 | for (unsigned Opcode : |
1388 | {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) |
1389 | setOperationAction(Opcode, VT, Legal); |
1390 | |
1391 | if (Subtarget->isLittleEndian()) { |
1392 | for (unsigned im = (unsigned)ISD::PRE_INC; |
1393 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
1394 | setIndexedLoadAction(im, VT, Legal); |
1395 | setIndexedStoreAction(im, VT, Legal); |
1396 | } |
1397 | } |
1398 | } |
1399 | |
1400 | void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { |
1401 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")((VT.isFixedLengthVector() && "Expected fixed length vector type!" ) ? static_cast<void> (0) : __assert_fail ("VT.isFixedLengthVector() && \"Expected fixed length vector type!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1401, __PRETTY_FUNCTION__)); |
1402 | |
1403 | // By default everything must be expanded. |
1404 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1405 | setOperationAction(Op, VT, Expand); |
1406 | |
1407 | // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one. |
1408 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1409 | |
1410 | if (VT.isFloatingPoint()) { |
1411 | setCondCodeAction(ISD::SETO, VT, Expand); |
1412 | setCondCodeAction(ISD::SETOLT, VT, Expand); |
1413 | setCondCodeAction(ISD::SETLT, VT, Expand); |
1414 | setCondCodeAction(ISD::SETOLE, VT, Expand); |
1415 | setCondCodeAction(ISD::SETLE, VT, Expand); |
1416 | setCondCodeAction(ISD::SETULT, VT, Expand); |
1417 | setCondCodeAction(ISD::SETULE, VT, Expand); |
1418 | setCondCodeAction(ISD::SETUGE, VT, Expand); |
1419 | setCondCodeAction(ISD::SETUGT, VT, Expand); |
1420 | setCondCodeAction(ISD::SETUEQ, VT, Expand); |
1421 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
1422 | } |
1423 | |
1424 | // Lower fixed length vector operations to scalable equivalents. |
1425 | setOperationAction(ISD::ABS, VT, Custom); |
1426 | setOperationAction(ISD::ADD, VT, Custom); |
1427 | setOperationAction(ISD::AND, VT, Custom); |
1428 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1429 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1430 | setOperationAction(ISD::BSWAP, VT, Custom); |
1431 | setOperationAction(ISD::CTLZ, VT, Custom); |
1432 | setOperationAction(ISD::CTPOP, VT, Custom); |
1433 | setOperationAction(ISD::CTTZ, VT, Custom); |
1434 | setOperationAction(ISD::FABS, VT, Custom); |
1435 | setOperationAction(ISD::FADD, VT, Custom); |
1436 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1437 | setOperationAction(ISD::FCEIL, VT, Custom); |
1438 | setOperationAction(ISD::FDIV, VT, Custom); |
1439 | setOperationAction(ISD::FFLOOR, VT, Custom); |
1440 | setOperationAction(ISD::FMA, VT, Custom); |
1441 | setOperationAction(ISD::FMAXIMUM, VT, Custom); |
1442 | setOperationAction(ISD::FMAXNUM, VT, Custom); |
1443 | setOperationAction(ISD::FMINIMUM, VT, Custom); |
1444 | setOperationAction(ISD::FMINNUM, VT, Custom); |
1445 | setOperationAction(ISD::FMUL, VT, Custom); |
1446 | setOperationAction(ISD::FNEARBYINT, VT, Custom); |
1447 | setOperationAction(ISD::FNEG, VT, Custom); |
1448 | setOperationAction(ISD::FRINT, VT, Custom); |
1449 | setOperationAction(ISD::FROUND, VT, Custom); |
1450 | setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
1451 | setOperationAction(ISD::FSQRT, VT, Custom); |
1452 | setOperationAction(ISD::FSUB, VT, Custom); |
1453 | setOperationAction(ISD::FTRUNC, VT, Custom); |
1454 | setOperationAction(ISD::LOAD, VT, Custom); |
1455 | setOperationAction(ISD::MUL, VT, Custom); |
1456 | setOperationAction(ISD::OR, VT, Custom); |
1457 | setOperationAction(ISD::SDIV, VT, Custom); |
1458 | setOperationAction(ISD::SELECT, VT, Custom); |
1459 | setOperationAction(ISD::SETCC, VT, Custom); |
1460 | setOperationAction(ISD::SHL, VT, Custom); |
1461 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1462 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); |
1463 | setOperationAction(ISD::SMAX, VT, Custom); |
1464 | setOperationAction(ISD::SMIN, VT, Custom); |
1465 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1466 | setOperationAction(ISD::SRA, VT, Custom); |
1467 | setOperationAction(ISD::SRL, VT, Custom); |
1468 | setOperationAction(ISD::STORE, VT, Custom); |
1469 | setOperationAction(ISD::SUB, VT, Custom); |
1470 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1471 | setOperationAction(ISD::UDIV, VT, Custom); |
1472 | setOperationAction(ISD::UMAX, VT, Custom); |
1473 | setOperationAction(ISD::UMIN, VT, Custom); |
1474 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1475 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1476 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1477 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1478 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1479 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1480 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1481 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1482 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1483 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1484 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1485 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1486 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1487 | setOperationAction(ISD::VSELECT, VT, Custom); |
1488 | setOperationAction(ISD::XOR, VT, Custom); |
1489 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1490 | } |
1491 | |
1492 | void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { |
1493 | addRegisterClass(VT, &AArch64::FPR64RegClass); |
1494 | addTypeForNEON(VT, MVT::v2i32); |
1495 | } |
1496 | |
1497 | void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { |
1498 | addRegisterClass(VT, &AArch64::FPR128RegClass); |
1499 | addTypeForNEON(VT, MVT::v4i32); |
1500 | } |
1501 | |
1502 | EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, |
1503 | LLVMContext &C, EVT VT) const { |
1504 | if (!VT.isVector()) |
1505 | return MVT::i32; |
1506 | if (VT.isScalableVector()) |
1507 | return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount()); |
1508 | return VT.changeVectorElementTypeToInteger(); |
1509 | } |
1510 | |
1511 | static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, |
1512 | const APInt &Demanded, |
1513 | TargetLowering::TargetLoweringOpt &TLO, |
1514 | unsigned NewOpc) { |
1515 | uint64_t OldImm = Imm, NewImm, Enc; |
1516 | uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; |
1517 | |
1518 | // Return if the immediate is already all zeros, all ones, a bimm32 or a |
1519 | // bimm64. |
1520 | if (Imm == 0 || Imm == Mask || |
1521 | AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) |
1522 | return false; |
1523 | |
1524 | unsigned EltSize = Size; |
1525 | uint64_t DemandedBits = Demanded.getZExtValue(); |
1526 | |
1527 | // Clear bits that are not demanded. |
1528 | Imm &= DemandedBits; |
1529 | |
1530 | while (true) { |
1531 | // The goal here is to set the non-demanded bits in a way that minimizes |
1532 | // the number of switching between 0 and 1. In order to achieve this goal, |
1533 | // we set the non-demanded bits to the value of the preceding demanded bits. |
1534 | // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a |
1535 | // non-demanded bit), we copy bit0 (1) to the least significant 'x', |
1536 | // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. |
1537 | // The final result is 0b11000011. |
1538 | uint64_t NonDemandedBits = ~DemandedBits; |
1539 | uint64_t InvertedImm = ~Imm & DemandedBits; |
1540 | uint64_t RotatedImm = |
1541 | ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & |
1542 | NonDemandedBits; |
1543 | uint64_t Sum = RotatedImm + NonDemandedBits; |
1544 | bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); |
1545 | uint64_t Ones = (Sum + Carry) & NonDemandedBits; |
1546 | NewImm = (Imm | Ones) & Mask; |
1547 | |
1548 | // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate |
1549 | // or all-ones or all-zeros, in which case we can stop searching. Otherwise, |
1550 | // we halve the element size and continue the search. |
1551 | if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) |
1552 | break; |
1553 | |
1554 | // We cannot shrink the element size any further if it is 2-bits. |
1555 | if (EltSize == 2) |
1556 | return false; |
1557 | |
1558 | EltSize /= 2; |
1559 | Mask >>= EltSize; |
1560 | uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; |
1561 | |
1562 | // Return if there is mismatch in any of the demanded bits of Imm and Hi. |
1563 | if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) |
1564 | return false; |
1565 | |
1566 | // Merge the upper and lower halves of Imm and DemandedBits. |
1567 | Imm |= Hi; |
1568 | DemandedBits |= DemandedBitsHi; |
1569 | } |
1570 | |
1571 | ++NumOptimizedImms; |
1572 | |
1573 | // Replicate the element across the register width. |
1574 | while (EltSize < Size) { |
1575 | NewImm |= NewImm << EltSize; |
1576 | EltSize *= 2; |
1577 | } |
1578 | |
1579 | (void)OldImm; |
1580 | assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && "demanded bits should never be altered") ? static_cast<void > (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1581, __PRETTY_FUNCTION__)) |
1581 | "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && "demanded bits should never be altered") ? static_cast<void > (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1581, __PRETTY_FUNCTION__)); |
1582 | assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm" ) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1582, __PRETTY_FUNCTION__)); |
1583 | |
1584 | // Create the new constant immediate node. |
1585 | EVT VT = Op.getValueType(); |
1586 | SDLoc DL(Op); |
1587 | SDValue New; |
1588 | |
1589 | // If the new constant immediate is all-zeros or all-ones, let the target |
1590 | // independent DAG combine optimize this node. |
1591 | if (NewImm == 0 || NewImm == OrigMask) { |
1592 | New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), |
1593 | TLO.DAG.getConstant(NewImm, DL, VT)); |
1594 | // Otherwise, create a machine node so that target independent DAG combine |
1595 | // doesn't undo this optimization. |
1596 | } else { |
1597 | Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); |
1598 | SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); |
1599 | New = SDValue( |
1600 | TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); |
1601 | } |
1602 | |
1603 | return TLO.CombineTo(Op, New); |
1604 | } |
1605 | |
1606 | bool AArch64TargetLowering::targetShrinkDemandedConstant( |
1607 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
1608 | TargetLoweringOpt &TLO) const { |
1609 | // Delay this optimization to as late as possible. |
1610 | if (!TLO.LegalOps) |
1611 | return false; |
1612 | |
1613 | if (!EnableOptimizeLogicalImm) |
1614 | return false; |
1615 | |
1616 | EVT VT = Op.getValueType(); |
1617 | if (VT.isVector()) |
1618 | return false; |
1619 | |
1620 | unsigned Size = VT.getSizeInBits(); |
1621 | assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization." ) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1622, __PRETTY_FUNCTION__)) |
1622 | "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization." ) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1622, __PRETTY_FUNCTION__)); |
1623 | |
1624 | // Exit early if we demand all bits. |
1625 | if (DemandedBits.countPopulation() == Size) |
1626 | return false; |
1627 | |
1628 | unsigned NewOpc; |
1629 | switch (Op.getOpcode()) { |
1630 | default: |
1631 | return false; |
1632 | case ISD::AND: |
1633 | NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; |
1634 | break; |
1635 | case ISD::OR: |
1636 | NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; |
1637 | break; |
1638 | case ISD::XOR: |
1639 | NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; |
1640 | break; |
1641 | } |
1642 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
1643 | if (!C) |
1644 | return false; |
1645 | uint64_t Imm = C->getZExtValue(); |
1646 | return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc); |
1647 | } |
1648 | |
1649 | /// computeKnownBitsForTargetNode - Determine which of the bits specified in |
1650 | /// Mask are known to be either zero or one and return them Known. |
1651 | void AArch64TargetLowering::computeKnownBitsForTargetNode( |
1652 | const SDValue Op, KnownBits &Known, |
1653 | const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { |
1654 | switch (Op.getOpcode()) { |
1655 | default: |
1656 | break; |
1657 | case AArch64ISD::CSEL: { |
1658 | KnownBits Known2; |
1659 | Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); |
1660 | Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); |
1661 | Known = KnownBits::commonBits(Known, Known2); |
1662 | break; |
1663 | } |
1664 | case AArch64ISD::LOADgot: |
1665 | case AArch64ISD::ADDlow: { |
1666 | if (!Subtarget->isTargetILP32()) |
1667 | break; |
1668 | // In ILP32 mode all valid pointers are in the low 4GB of the address-space. |
1669 | Known.Zero = APInt::getHighBitsSet(64, 32); |
1670 | break; |
1671 | } |
1672 | case ISD::INTRINSIC_W_CHAIN: { |
1673 | ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); |
1674 | Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); |
1675 | switch (IntID) { |
1676 | default: return; |
1677 | case Intrinsic::aarch64_ldaxr: |
1678 | case Intrinsic::aarch64_ldxr: { |
1679 | unsigned BitWidth = Known.getBitWidth(); |
1680 | EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); |
1681 | unsigned MemBits = VT.getScalarSizeInBits(); |
1682 | Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); |
1683 | return; |
1684 | } |
1685 | } |
1686 | break; |
1687 | } |
1688 | case ISD::INTRINSIC_WO_CHAIN: |
1689 | case ISD::INTRINSIC_VOID: { |
1690 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
1691 | switch (IntNo) { |
1692 | default: |
1693 | break; |
1694 | case Intrinsic::aarch64_neon_umaxv: |
1695 | case Intrinsic::aarch64_neon_uminv: { |
1696 | // Figure out the datatype of the vector operand. The UMINV instruction |
1697 | // will zero extend the result, so we can mark as known zero all the |
1698 | // bits larger than the element datatype. 32-bit or larget doesn't need |
1699 | // this as those are legal types and will be handled by isel directly. |
1700 | MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); |
1701 | unsigned BitWidth = Known.getBitWidth(); |
1702 | if (VT == MVT::v8i8 || VT == MVT::v16i8) { |
1703 | assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast <void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1703, __PRETTY_FUNCTION__)); |
1704 | APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); |
1705 | Known.Zero |= Mask; |
1706 | } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { |
1707 | assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast <void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 1707, __PRETTY_FUNCTION__)); |
1708 | APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); |
1709 | Known.Zero |= Mask; |
1710 | } |
1711 | break; |
1712 | } break; |
1713 | } |
1714 | } |
1715 | } |
1716 | } |
1717 | |
1718 | MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, |
1719 | EVT) const { |
1720 | return MVT::i64; |
1721 | } |
1722 | |
1723 | bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
1724 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
1725 | bool *Fast) const { |
1726 | if (Subtarget->requiresStrictAlign()) |
1727 | return false; |
1728 | |
1729 | if (Fast) { |
1730 | // Some CPUs are fine with unaligned stores except for 128-bit ones. |
1731 | *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || |
1732 | // See comments in performSTORECombine() for more details about |
1733 | // these conditions. |
1734 | |
1735 | // Code that uses clang vector extensions can mark that it |
1736 | // wants unaligned accesses to be treated as fast by |
1737 | // underspecifying alignment to be 1 or 2. |
1738 | Alignment <= 2 || |
1739 | |
1740 | // Disregard v2i64. Memcpy lowering produces those and splitting |
1741 | // them regresses performance on micro-benchmarks and olden/bh. |
1742 | VT == MVT::v2i64; |
1743 | } |
1744 | return true; |
1745 | } |
1746 | |
1747 | // Same as above but handling LLTs instead. |
1748 | bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
1749 | LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
1750 | bool *Fast) const { |
1751 | if (Subtarget->requiresStrictAlign()) |
1752 | return false; |
1753 | |
1754 | if (Fast) { |
1755 | // Some CPUs are fine with unaligned stores except for 128-bit ones. |
1756 | *Fast = !Subtarget->isMisaligned128StoreSlow() || |
1757 | Ty.getSizeInBytes() != 16 || |
1758 | // See comments in performSTORECombine() for more details about |
1759 | // these conditions. |
1760 | |
1761 | // Code that uses clang vector extensions can mark that it |
1762 | // wants unaligned accesses to be treated as fast by |
1763 | // underspecifying alignment to be 1 or 2. |
1764 | Alignment <= 2 || |
1765 | |
1766 | // Disregard v2i64. Memcpy lowering produces those and splitting |
1767 | // them regresses performance on micro-benchmarks and olden/bh. |
1768 | Ty == LLT::vector(2, 64); |
1769 | } |
1770 | return true; |
1771 | } |
1772 | |
1773 | FastISel * |
1774 | AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
1775 | const TargetLibraryInfo *libInfo) const { |
1776 | return AArch64::createFastISel(funcInfo, libInfo); |
1777 | } |
1778 | |
1779 | const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { |
1780 | #define MAKE_CASE(V) \ |
1781 | case V: \ |
1782 | return #V; |
1783 | switch ((AArch64ISD::NodeType)Opcode) { |
1784 | case AArch64ISD::FIRST_NUMBER: |
1785 | break; |
1786 | MAKE_CASE(AArch64ISD::CALL) |
1787 | MAKE_CASE(AArch64ISD::ADRP) |
1788 | MAKE_CASE(AArch64ISD::ADR) |
1789 | MAKE_CASE(AArch64ISD::ADDlow) |
1790 | MAKE_CASE(AArch64ISD::LOADgot) |
1791 | MAKE_CASE(AArch64ISD::RET_FLAG) |
1792 | MAKE_CASE(AArch64ISD::BRCOND) |
1793 | MAKE_CASE(AArch64ISD::CSEL) |
1794 | MAKE_CASE(AArch64ISD::FCSEL) |
1795 | MAKE_CASE(AArch64ISD::CSINV) |
1796 | MAKE_CASE(AArch64ISD::CSNEG) |
1797 | MAKE_CASE(AArch64ISD::CSINC) |
1798 | MAKE_CASE(AArch64ISD::THREAD_POINTER) |
1799 | MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) |
1800 | MAKE_CASE(AArch64ISD::ADD_PRED) |
1801 | MAKE_CASE(AArch64ISD::MUL_PRED) |
1802 | MAKE_CASE(AArch64ISD::SDIV_PRED) |
1803 | MAKE_CASE(AArch64ISD::SHL_PRED) |
1804 | MAKE_CASE(AArch64ISD::SMAX_PRED) |
1805 | MAKE_CASE(AArch64ISD::SMIN_PRED) |
1806 | MAKE_CASE(AArch64ISD::SRA_PRED) |
1807 | MAKE_CASE(AArch64ISD::SRL_PRED) |
1808 | MAKE_CASE(AArch64ISD::SUB_PRED) |
1809 | MAKE_CASE(AArch64ISD::UDIV_PRED) |
1810 | MAKE_CASE(AArch64ISD::UMAX_PRED) |
1811 | MAKE_CASE(AArch64ISD::UMIN_PRED) |
1812 | MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) |
1813 | MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) |
1814 | MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) |
1815 | MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) |
1816 | MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) |
1817 | MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) |
1818 | MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) |
1819 | MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) |
1820 | MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) |
1821 | MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) |
1822 | MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) |
1823 | MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) |
1824 | MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) |
1825 | MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) |
1826 | MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) |
1827 | MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) |
1828 | MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) |
1829 | MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) |
1830 | MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) |
1831 | MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) |
1832 | MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) |
1833 | MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) |
1834 | MAKE_CASE(AArch64ISD::ADC) |
1835 | MAKE_CASE(AArch64ISD::SBC) |
1836 | MAKE_CASE(AArch64ISD::ADDS) |
1837 | MAKE_CASE(AArch64ISD::SUBS) |
1838 | MAKE_CASE(AArch64ISD::ADCS) |
1839 | MAKE_CASE(AArch64ISD::SBCS) |
1840 | MAKE_CASE(AArch64ISD::ANDS) |
1841 | MAKE_CASE(AArch64ISD::CCMP) |
1842 | MAKE_CASE(AArch64ISD::CCMN) |
1843 | MAKE_CASE(AArch64ISD::FCCMP) |
1844 | MAKE_CASE(AArch64ISD::FCMP) |
1845 | MAKE_CASE(AArch64ISD::STRICT_FCMP) |
1846 | MAKE_CASE(AArch64ISD::STRICT_FCMPE) |
1847 | MAKE_CASE(AArch64ISD::DUP) |
1848 | MAKE_CASE(AArch64ISD::DUPLANE8) |
1849 | MAKE_CASE(AArch64ISD::DUPLANE16) |
1850 | MAKE_CASE(AArch64ISD::DUPLANE32) |
1851 | MAKE_CASE(AArch64ISD::DUPLANE64) |
1852 | MAKE_CASE(AArch64ISD::MOVI) |
1853 | MAKE_CASE(AArch64ISD::MOVIshift) |
1854 | MAKE_CASE(AArch64ISD::MOVIedit) |
1855 | MAKE_CASE(AArch64ISD::MOVImsl) |
1856 | MAKE_CASE(AArch64ISD::FMOV) |
1857 | MAKE_CASE(AArch64ISD::MVNIshift) |
1858 | MAKE_CASE(AArch64ISD::MVNImsl) |
1859 | MAKE_CASE(AArch64ISD::BICi) |
1860 | MAKE_CASE(AArch64ISD::ORRi) |
1861 | MAKE_CASE(AArch64ISD::BSP) |
1862 | MAKE_CASE(AArch64ISD::NEG) |
1863 | MAKE_CASE(AArch64ISD::EXTR) |
1864 | MAKE_CASE(AArch64ISD::ZIP1) |
1865 | MAKE_CASE(AArch64ISD::ZIP2) |
1866 | MAKE_CASE(AArch64ISD::UZP1) |
1867 | MAKE_CASE(AArch64ISD::UZP2) |
1868 | MAKE_CASE(AArch64ISD::TRN1) |
1869 | MAKE_CASE(AArch64ISD::TRN2) |
1870 | MAKE_CASE(AArch64ISD::REV16) |
1871 | MAKE_CASE(AArch64ISD::REV32) |
1872 | MAKE_CASE(AArch64ISD::REV64) |
1873 | MAKE_CASE(AArch64ISD::EXT) |
1874 | MAKE_CASE(AArch64ISD::VSHL) |
1875 | MAKE_CASE(AArch64ISD::VLSHR) |
1876 | MAKE_CASE(AArch64ISD::VASHR) |
1877 | MAKE_CASE(AArch64ISD::VSLI) |
1878 | MAKE_CASE(AArch64ISD::VSRI) |
1879 | MAKE_CASE(AArch64ISD::CMEQ) |
1880 | MAKE_CASE(AArch64ISD::CMGE) |
1881 | MAKE_CASE(AArch64ISD::CMGT) |
1882 | MAKE_CASE(AArch64ISD::CMHI) |
1883 | MAKE_CASE(AArch64ISD::CMHS) |
1884 | MAKE_CASE(AArch64ISD::FCMEQ) |
1885 | MAKE_CASE(AArch64ISD::FCMGE) |
1886 | MAKE_CASE(AArch64ISD::FCMGT) |
1887 | MAKE_CASE(AArch64ISD::CMEQz) |
1888 | MAKE_CASE(AArch64ISD::CMGEz) |
1889 | MAKE_CASE(AArch64ISD::CMGTz) |
1890 | MAKE_CASE(AArch64ISD::CMLEz) |
1891 | MAKE_CASE(AArch64ISD::CMLTz) |
1892 | MAKE_CASE(AArch64ISD::FCMEQz) |
1893 | MAKE_CASE(AArch64ISD::FCMGEz) |
1894 | MAKE_CASE(AArch64ISD::FCMGTz) |
1895 | MAKE_CASE(AArch64ISD::FCMLEz) |
1896 | MAKE_CASE(AArch64ISD::FCMLTz) |
1897 | MAKE_CASE(AArch64ISD::SADDV) |
1898 | MAKE_CASE(AArch64ISD::UADDV) |
1899 | MAKE_CASE(AArch64ISD::SRHADD) |
1900 | MAKE_CASE(AArch64ISD::URHADD) |
1901 | MAKE_CASE(AArch64ISD::SHADD) |
1902 | MAKE_CASE(AArch64ISD::UHADD) |
1903 | MAKE_CASE(AArch64ISD::SDOT) |
1904 | MAKE_CASE(AArch64ISD::UDOT) |
1905 | MAKE_CASE(AArch64ISD::SMINV) |
1906 | MAKE_CASE(AArch64ISD::UMINV) |
1907 | MAKE_CASE(AArch64ISD::SMAXV) |
1908 | MAKE_CASE(AArch64ISD::UMAXV) |
1909 | MAKE_CASE(AArch64ISD::SADDV_PRED) |
1910 | MAKE_CASE(AArch64ISD::UADDV_PRED) |
1911 | MAKE_CASE(AArch64ISD::SMAXV_PRED) |
1912 | MAKE_CASE(AArch64ISD::UMAXV_PRED) |
1913 | MAKE_CASE(AArch64ISD::SMINV_PRED) |
1914 | MAKE_CASE(AArch64ISD::UMINV_PRED) |
1915 | MAKE_CASE(AArch64ISD::ORV_PRED) |
1916 | MAKE_CASE(AArch64ISD::EORV_PRED) |
1917 | MAKE_CASE(AArch64ISD::ANDV_PRED) |
1918 | MAKE_CASE(AArch64ISD::CLASTA_N) |
1919 | MAKE_CASE(AArch64ISD::CLASTB_N) |
1920 | MAKE_CASE(AArch64ISD::LASTA) |
1921 | MAKE_CASE(AArch64ISD::LASTB) |
1922 | MAKE_CASE(AArch64ISD::REINTERPRET_CAST) |
1923 | MAKE_CASE(AArch64ISD::TBL) |
1924 | MAKE_CASE(AArch64ISD::FADD_PRED) |
1925 | MAKE_CASE(AArch64ISD::FADDA_PRED) |
1926 | MAKE_CASE(AArch64ISD::FADDV_PRED) |
1927 | MAKE_CASE(AArch64ISD::FDIV_PRED) |
1928 | MAKE_CASE(AArch64ISD::FMA_PRED) |
1929 | MAKE_CASE(AArch64ISD::FMAX_PRED) |
1930 | MAKE_CASE(AArch64ISD::FMAXV_PRED) |
1931 | MAKE_CASE(AArch64ISD::FMAXNM_PRED) |
1932 | MAKE_CASE(AArch64ISD::FMAXNMV_PRED) |
1933 | MAKE_CASE(AArch64ISD::FMIN_PRED) |
1934 | MAKE_CASE(AArch64ISD::FMINV_PRED) |
1935 | MAKE_CASE(AArch64ISD::FMINNM_PRED) |
1936 | MAKE_CASE(AArch64ISD::FMINNMV_PRED) |
1937 | MAKE_CASE(AArch64ISD::FMUL_PRED) |
1938 | MAKE_CASE(AArch64ISD::FSUB_PRED) |
1939 | MAKE_CASE(AArch64ISD::BIT) |
1940 | MAKE_CASE(AArch64ISD::CBZ) |
1941 | MAKE_CASE(AArch64ISD::CBNZ) |
1942 | MAKE_CASE(AArch64ISD::TBZ) |
1943 | MAKE_CASE(AArch64ISD::TBNZ) |
1944 | MAKE_CASE(AArch64ISD::TC_RETURN) |
1945 | MAKE_CASE(AArch64ISD::PREFETCH) |
1946 | MAKE_CASE(AArch64ISD::SITOF) |
1947 | MAKE_CASE(AArch64ISD::UITOF) |
1948 | MAKE_CASE(AArch64ISD::NVCAST) |
1949 | MAKE_CASE(AArch64ISD::MRS) |
1950 | MAKE_CASE(AArch64ISD::SQSHL_I) |
1951 | MAKE_CASE(AArch64ISD::UQSHL_I) |
1952 | MAKE_CASE(AArch64ISD::SRSHR_I) |
1953 | MAKE_CASE(AArch64ISD::URSHR_I) |
1954 | MAKE_CASE(AArch64ISD::SQSHLU_I) |
1955 | MAKE_CASE(AArch64ISD::WrapperLarge) |
1956 | MAKE_CASE(AArch64ISD::LD2post) |
1957 | MAKE_CASE(AArch64ISD::LD3post) |
1958 | MAKE_CASE(AArch64ISD::LD4post) |
1959 | MAKE_CASE(AArch64ISD::ST2post) |
1960 | MAKE_CASE(AArch64ISD::ST3post) |
1961 | MAKE_CASE(AArch64ISD::ST4post) |
1962 | MAKE_CASE(AArch64ISD::LD1x2post) |
1963 | MAKE_CASE(AArch64ISD::LD1x3post) |
1964 | MAKE_CASE(AArch64ISD::LD1x4post) |
1965 | MAKE_CASE(AArch64ISD::ST1x2post) |
1966 | MAKE_CASE(AArch64ISD::ST1x3post) |
1967 | MAKE_CASE(AArch64ISD::ST1x4post) |
1968 | MAKE_CASE(AArch64ISD::LD1DUPpost) |
1969 | MAKE_CASE(AArch64ISD::LD2DUPpost) |
1970 | MAKE_CASE(AArch64ISD::LD3DUPpost) |
1971 | MAKE_CASE(AArch64ISD::LD4DUPpost) |
1972 | MAKE_CASE(AArch64ISD::LD1LANEpost) |
1973 | MAKE_CASE(AArch64ISD::LD2LANEpost) |
1974 | MAKE_CASE(AArch64ISD::LD3LANEpost) |
1975 | MAKE_CASE(AArch64ISD::LD4LANEpost) |
1976 | MAKE_CASE(AArch64ISD::ST2LANEpost) |
1977 | MAKE_CASE(AArch64ISD::ST3LANEpost) |
1978 | MAKE_CASE(AArch64ISD::ST4LANEpost) |
1979 | MAKE_CASE(AArch64ISD::SMULL) |
1980 | MAKE_CASE(AArch64ISD::UMULL) |
1981 | MAKE_CASE(AArch64ISD::FRECPE) |
1982 | MAKE_CASE(AArch64ISD::FRECPS) |
1983 | MAKE_CASE(AArch64ISD::FRSQRTE) |
1984 | MAKE_CASE(AArch64ISD::FRSQRTS) |
1985 | MAKE_CASE(AArch64ISD::STG) |
1986 | MAKE_CASE(AArch64ISD::STZG) |
1987 | MAKE_CASE(AArch64ISD::ST2G) |
1988 | MAKE_CASE(AArch64ISD::STZ2G) |
1989 | MAKE_CASE(AArch64ISD::SUNPKHI) |
1990 | MAKE_CASE(AArch64ISD::SUNPKLO) |
1991 | MAKE_CASE(AArch64ISD::UUNPKHI) |
1992 | MAKE_CASE(AArch64ISD::UUNPKLO) |
1993 | MAKE_CASE(AArch64ISD::INSR) |
1994 | MAKE_CASE(AArch64ISD::PTEST) |
1995 | MAKE_CASE(AArch64ISD::PTRUE) |
1996 | MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) |
1997 | MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) |
1998 | MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) |
1999 | MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO) |
2000 | MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO) |
2001 | MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO) |
2002 | MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO) |
2003 | MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO) |
2004 | MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO) |
2005 | MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO) |
2006 | MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO) |
2007 | MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO) |
2008 | MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO) |
2009 | MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO) |
2010 | MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO) |
2011 | MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) |
2012 | MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) |
2013 | MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) |
2014 | MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) |
2015 | MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) |
2016 | MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) |
2017 | MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO) |
2018 | MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO) |
2019 | MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO) |
2020 | MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO) |
2021 | MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO) |
2022 | MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO) |
2023 | MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO) |
2024 | MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO) |
2025 | MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO) |
2026 | MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO) |
2027 | MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO) |
2028 | MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO) |
2029 | MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO) |
2030 | MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO) |
2031 | MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO) |
2032 | MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO) |
2033 | MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO) |
2034 | MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO) |
2035 | MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) |
2036 | MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) |
2037 | MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) |
2038 | MAKE_CASE(AArch64ISD::ST1_PRED) |
2039 | MAKE_CASE(AArch64ISD::SST1_PRED) |
2040 | MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) |
2041 | MAKE_CASE(AArch64ISD::SST1_SXTW_PRED) |
2042 | MAKE_CASE(AArch64ISD::SST1_UXTW_PRED) |
2043 | MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED) |
2044 | MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED) |
2045 | MAKE_CASE(AArch64ISD::SST1_IMM_PRED) |
2046 | MAKE_CASE(AArch64ISD::SSTNT1_PRED) |
2047 | MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) |
2048 | MAKE_CASE(AArch64ISD::LDP) |
2049 | MAKE_CASE(AArch64ISD::STP) |
2050 | MAKE_CASE(AArch64ISD::STNP) |
2051 | MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) |
2052 | MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) |
2053 | MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) |
2054 | MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) |
2055 | MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) |
2056 | MAKE_CASE(AArch64ISD::INDEX_VECTOR) |
2057 | MAKE_CASE(AArch64ISD::UABD) |
2058 | MAKE_CASE(AArch64ISD::SABD) |
2059 | MAKE_CASE(AArch64ISD::CALL_RVMARKER) |
2060 | } |
2061 | #undef MAKE_CASE |
2062 | return nullptr; |
2063 | } |
2064 | |
2065 | MachineBasicBlock * |
2066 | AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, |
2067 | MachineBasicBlock *MBB) const { |
2068 | // We materialise the F128CSEL pseudo-instruction as some control flow and a |
2069 | // phi node: |
2070 | |
2071 | // OrigBB: |
2072 | // [... previous instrs leading to comparison ...] |
2073 | // b.ne TrueBB |
2074 | // b EndBB |
2075 | // TrueBB: |
2076 | // ; Fallthrough |
2077 | // EndBB: |
2078 | // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] |
2079 | |
2080 | MachineFunction *MF = MBB->getParent(); |
2081 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
2082 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
2083 | DebugLoc DL = MI.getDebugLoc(); |
2084 | MachineFunction::iterator It = ++MBB->getIterator(); |
2085 | |
2086 | Register DestReg = MI.getOperand(0).getReg(); |
2087 | Register IfTrueReg = MI.getOperand(1).getReg(); |
2088 | Register IfFalseReg = MI.getOperand(2).getReg(); |
2089 | unsigned CondCode = MI.getOperand(3).getImm(); |
2090 | bool NZCVKilled = MI.getOperand(4).isKill(); |
2091 | |
2092 | MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); |
2093 | MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); |
2094 | MF->insert(It, TrueBB); |
2095 | MF->insert(It, EndBB); |
2096 | |
2097 | // Transfer rest of current basic-block to EndBB |
2098 | EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), |
2099 | MBB->end()); |
2100 | EndBB->transferSuccessorsAndUpdatePHIs(MBB); |
2101 | |
2102 | BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); |
2103 | BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
2104 | MBB->addSuccessor(TrueBB); |
2105 | MBB->addSuccessor(EndBB); |
2106 | |
2107 | // TrueBB falls through to the end. |
2108 | TrueBB->addSuccessor(EndBB); |
2109 | |
2110 | if (!NZCVKilled) { |
2111 | TrueBB->addLiveIn(AArch64::NZCV); |
2112 | EndBB->addLiveIn(AArch64::NZCV); |
2113 | } |
2114 | |
2115 | BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) |
2116 | .addReg(IfTrueReg) |
2117 | .addMBB(TrueBB) |
2118 | .addReg(IfFalseReg) |
2119 | .addMBB(MBB); |
2120 | |
2121 | MI.eraseFromParent(); |
2122 | return EndBB; |
2123 | } |
2124 | |
2125 | MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( |
2126 | MachineInstr &MI, MachineBasicBlock *BB) const { |
2127 | assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB-> getParent()->getFunction().getPersonalityFn())) && "SEH does not use catchret!") ? static_cast<void> (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2129, __PRETTY_FUNCTION__)) |
2128 | BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB-> getParent()->getFunction().getPersonalityFn())) && "SEH does not use catchret!") ? static_cast<void> (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2129, __PRETTY_FUNCTION__)) |
2129 | "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB-> getParent()->getFunction().getPersonalityFn())) && "SEH does not use catchret!") ? static_cast<void> (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2129, __PRETTY_FUNCTION__)); |
2130 | return BB; |
2131 | } |
2132 | |
2133 | MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( |
2134 | MachineInstr &MI, MachineBasicBlock *BB) const { |
2135 | switch (MI.getOpcode()) { |
2136 | default: |
2137 | #ifndef NDEBUG |
2138 | MI.dump(); |
2139 | #endif |
2140 | llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2140); |
2141 | |
2142 | case AArch64::F128CSEL: |
2143 | return EmitF128CSEL(MI, BB); |
2144 | |
2145 | case TargetOpcode::STACKMAP: |
2146 | case TargetOpcode::PATCHPOINT: |
2147 | case TargetOpcode::STATEPOINT: |
2148 | return emitPatchPoint(MI, BB); |
2149 | |
2150 | case AArch64::CATCHRET: |
2151 | return EmitLoweredCatchRet(MI, BB); |
2152 | } |
2153 | } |
2154 | |
2155 | //===----------------------------------------------------------------------===// |
2156 | // AArch64 Lowering private implementation. |
2157 | //===----------------------------------------------------------------------===// |
2158 | |
2159 | //===----------------------------------------------------------------------===// |
2160 | // Lowering Code |
2161 | //===----------------------------------------------------------------------===// |
2162 | |
2163 | /// isZerosVector - Check whether SDNode N is a zero-filled vector. |
2164 | static bool isZerosVector(const SDNode *N) { |
2165 | // Look through a bit convert. |
2166 | while (N->getOpcode() == ISD::BITCAST) |
2167 | N = N->getOperand(0).getNode(); |
2168 | |
2169 | if (ISD::isConstantSplatVectorAllZeros(N)) |
2170 | return true; |
2171 | |
2172 | if (N->getOpcode() != AArch64ISD::DUP) |
2173 | return false; |
2174 | |
2175 | auto Opnd0 = N->getOperand(0); |
2176 | auto *CINT = dyn_cast<ConstantSDNode>(Opnd0); |
2177 | auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0); |
2178 | return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero()); |
2179 | } |
2180 | |
2181 | /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 |
2182 | /// CC |
2183 | static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { |
2184 | switch (CC) { |
2185 | default: |
2186 | llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2186); |
2187 | case ISD::SETNE: |
2188 | return AArch64CC::NE; |
2189 | case ISD::SETEQ: |
2190 | return AArch64CC::EQ; |
2191 | case ISD::SETGT: |
2192 | return AArch64CC::GT; |
2193 | case ISD::SETGE: |
2194 | return AArch64CC::GE; |
2195 | case ISD::SETLT: |
2196 | return AArch64CC::LT; |
2197 | case ISD::SETLE: |
2198 | return AArch64CC::LE; |
2199 | case ISD::SETUGT: |
2200 | return AArch64CC::HI; |
2201 | case ISD::SETUGE: |
2202 | return AArch64CC::HS; |
2203 | case ISD::SETULT: |
2204 | return AArch64CC::LO; |
2205 | case ISD::SETULE: |
2206 | return AArch64CC::LS; |
2207 | } |
2208 | } |
2209 | |
2210 | /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. |
2211 | static void changeFPCCToAArch64CC(ISD::CondCode CC, |
2212 | AArch64CC::CondCode &CondCode, |
2213 | AArch64CC::CondCode &CondCode2) { |
2214 | CondCode2 = AArch64CC::AL; |
2215 | switch (CC) { |
2216 | default: |
2217 | llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2217); |
2218 | case ISD::SETEQ: |
2219 | case ISD::SETOEQ: |
2220 | CondCode = AArch64CC::EQ; |
2221 | break; |
2222 | case ISD::SETGT: |
2223 | case ISD::SETOGT: |
2224 | CondCode = AArch64CC::GT; |
2225 | break; |
2226 | case ISD::SETGE: |
2227 | case ISD::SETOGE: |
2228 | CondCode = AArch64CC::GE; |
2229 | break; |
2230 | case ISD::SETOLT: |
2231 | CondCode = AArch64CC::MI; |
2232 | break; |
2233 | case ISD::SETOLE: |
2234 | CondCode = AArch64CC::LS; |
2235 | break; |
2236 | case ISD::SETONE: |
2237 | CondCode = AArch64CC::MI; |
2238 | CondCode2 = AArch64CC::GT; |
2239 | break; |
2240 | case ISD::SETO: |
2241 | CondCode = AArch64CC::VC; |
2242 | break; |
2243 | case ISD::SETUO: |
2244 | CondCode = AArch64CC::VS; |
2245 | break; |
2246 | case ISD::SETUEQ: |
2247 | CondCode = AArch64CC::EQ; |
2248 | CondCode2 = AArch64CC::VS; |
2249 | break; |
2250 | case ISD::SETUGT: |
2251 | CondCode = AArch64CC::HI; |
2252 | break; |
2253 | case ISD::SETUGE: |
2254 | CondCode = AArch64CC::PL; |
2255 | break; |
2256 | case ISD::SETLT: |
2257 | case ISD::SETULT: |
2258 | CondCode = AArch64CC::LT; |
2259 | break; |
2260 | case ISD::SETLE: |
2261 | case ISD::SETULE: |
2262 | CondCode = AArch64CC::LE; |
2263 | break; |
2264 | case ISD::SETNE: |
2265 | case ISD::SETUNE: |
2266 | CondCode = AArch64CC::NE; |
2267 | break; |
2268 | } |
2269 | } |
2270 | |
2271 | /// Convert a DAG fp condition code to an AArch64 CC. |
2272 | /// This differs from changeFPCCToAArch64CC in that it returns cond codes that |
2273 | /// should be AND'ed instead of OR'ed. |
2274 | static void changeFPCCToANDAArch64CC(ISD::CondCode CC, |
2275 | AArch64CC::CondCode &CondCode, |
2276 | AArch64CC::CondCode &CondCode2) { |
2277 | CondCode2 = AArch64CC::AL; |
2278 | switch (CC) { |
2279 | default: |
2280 | changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
2281 | assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2281, __PRETTY_FUNCTION__)); |
2282 | break; |
2283 | case ISD::SETONE: |
2284 | // (a one b) |
2285 | // == ((a olt b) || (a ogt b)) |
2286 | // == ((a ord b) && (a une b)) |
2287 | CondCode = AArch64CC::VC; |
2288 | CondCode2 = AArch64CC::NE; |
2289 | break; |
2290 | case ISD::SETUEQ: |
2291 | // (a ueq b) |
2292 | // == ((a uno b) || (a oeq b)) |
2293 | // == ((a ule b) && (a uge b)) |
2294 | CondCode = AArch64CC::PL; |
2295 | CondCode2 = AArch64CC::LE; |
2296 | break; |
2297 | } |
2298 | } |
2299 | |
2300 | /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 |
2301 | /// CC usable with the vector instructions. Fewer operations are available |
2302 | /// without a real NZCV register, so we have to use less efficient combinations |
2303 | /// to get the same effect. |
2304 | static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, |
2305 | AArch64CC::CondCode &CondCode, |
2306 | AArch64CC::CondCode &CondCode2, |
2307 | bool &Invert) { |
2308 | Invert = false; |
2309 | switch (CC) { |
2310 | default: |
2311 | // Mostly the scalar mappings work fine. |
2312 | changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
2313 | break; |
2314 | case ISD::SETUO: |
2315 | Invert = true; |
2316 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
2317 | case ISD::SETO: |
2318 | CondCode = AArch64CC::MI; |
2319 | CondCode2 = AArch64CC::GE; |
2320 | break; |
2321 | case ISD::SETUEQ: |
2322 | case ISD::SETULT: |
2323 | case ISD::SETULE: |
2324 | case ISD::SETUGT: |
2325 | case ISD::SETUGE: |
2326 | // All of the compare-mask comparisons are ordered, but we can switch |
2327 | // between the two by a double inversion. E.g. ULE == !OGT. |
2328 | Invert = true; |
2329 | changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32), |
2330 | CondCode, CondCode2); |
2331 | break; |
2332 | } |
2333 | } |
2334 | |
2335 | static bool isLegalArithImmed(uint64_t C) { |
2336 | // Matches AArch64DAGToDAGISel::SelectArithImmed(). |
2337 | bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); |
2338 | LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"); } } while ( false) |
2339 | << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"); } } while ( false); |
2340 | return IsLegal; |
2341 | } |
2342 | |
2343 | // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on |
2344 | // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags |
2345 | // can be set differently by this operation. It comes down to whether |
2346 | // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then |
2347 | // everything is fine. If not then the optimization is wrong. Thus general |
2348 | // comparisons are only valid if op2 != 0. |
2349 | // |
2350 | // So, finally, the only LLVM-native comparisons that don't mention C and V |
2351 | // are SETEQ and SETNE. They're the only ones we can safely use CMN for in |
2352 | // the absence of information about op2. |
2353 | static bool isCMN(SDValue Op, ISD::CondCode CC) { |
2354 | return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && |
2355 | (CC == ISD::SETEQ || CC == ISD::SETNE); |
2356 | } |
2357 | |
2358 | static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, |
2359 | SelectionDAG &DAG, SDValue Chain, |
2360 | bool IsSignaling) { |
2361 | EVT VT = LHS.getValueType(); |
2362 | assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2362, __PRETTY_FUNCTION__)); |
2363 | assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")((VT != MVT::f16 && "Lowering of strict fp16 not yet implemented" ) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2363, __PRETTY_FUNCTION__)); |
2364 | unsigned Opcode = |
2365 | IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; |
2366 | return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); |
2367 | } |
2368 | |
2369 | static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
2370 | const SDLoc &dl, SelectionDAG &DAG) { |
2371 | EVT VT = LHS.getValueType(); |
2372 | const bool FullFP16 = |
2373 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
2374 | |
2375 | if (VT.isFloatingPoint()) { |
2376 | assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2376, __PRETTY_FUNCTION__)); |
2377 | if (VT == MVT::f16 && !FullFP16) { |
2378 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); |
2379 | RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); |
2380 | VT = MVT::f32; |
2381 | } |
2382 | return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); |
2383 | } |
2384 | |
2385 | // The CMP instruction is just an alias for SUBS, and representing it as |
2386 | // SUBS means that it's possible to get CSE with subtract operations. |
2387 | // A later phase can perform the optimization of setting the destination |
2388 | // register to WZR/XZR if it ends up being unused. |
2389 | unsigned Opcode = AArch64ISD::SUBS; |
2390 | |
2391 | if (isCMN(RHS, CC)) { |
2392 | // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ? |
2393 | Opcode = AArch64ISD::ADDS; |
2394 | RHS = RHS.getOperand(1); |
2395 | } else if (isCMN(LHS, CC)) { |
2396 | // As we are looking for EQ/NE compares, the operands can be commuted ; can |
2397 | // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ? |
2398 | Opcode = AArch64ISD::ADDS; |
2399 | LHS = LHS.getOperand(1); |
2400 | } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) { |
2401 | if (LHS.getOpcode() == ISD::AND) { |
2402 | // Similarly, (CMP (and X, Y), 0) can be implemented with a TST |
2403 | // (a.k.a. ANDS) except that the flags are only guaranteed to work for one |
2404 | // of the signed comparisons. |
2405 | const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl, |
2406 | DAG.getVTList(VT, MVT_CC), |
2407 | LHS.getOperand(0), |
2408 | LHS.getOperand(1)); |
2409 | // Replace all users of (and X, Y) with newly generated (ands X, Y) |
2410 | DAG.ReplaceAllUsesWith(LHS, ANDSNode); |
2411 | return ANDSNode.getValue(1); |
2412 | } else if (LHS.getOpcode() == AArch64ISD::ANDS) { |
2413 | // Use result of ANDS |
2414 | return LHS.getValue(1); |
2415 | } |
2416 | } |
2417 | |
2418 | return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) |
2419 | .getValue(1); |
2420 | } |
2421 | |
2422 | /// \defgroup AArch64CCMP CMP;CCMP matching |
2423 | /// |
2424 | /// These functions deal with the formation of CMP;CCMP;... sequences. |
2425 | /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of |
2426 | /// a comparison. They set the NZCV flags to a predefined value if their |
2427 | /// predicate is false. This allows to express arbitrary conjunctions, for |
2428 | /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))" |
2429 | /// expressed as: |
2430 | /// cmp A |
2431 | /// ccmp B, inv(CB), CA |
2432 | /// check for CB flags |
2433 | /// |
2434 | /// This naturally lets us implement chains of AND operations with SETCC |
2435 | /// operands. And we can even implement some other situations by transforming |
2436 | /// them: |
2437 | /// - We can implement (NEG SETCC) i.e. negating a single comparison by |
2438 | /// negating the flags used in a CCMP/FCCMP operations. |
2439 | /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations |
2440 | /// by negating the flags we test for afterwards. i.e. |
2441 | /// NEG (CMP CCMP CCCMP ...) can be implemented. |
2442 | /// - Note that we can only ever negate all previously processed results. |
2443 | /// What we can not implement by flipping the flags to test is a negation |
2444 | /// of two sub-trees (because the negation affects all sub-trees emitted so |
2445 | /// far, so the 2nd sub-tree we emit would also affect the first). |
2446 | /// With those tools we can implement some OR operations: |
2447 | /// - (OR (SETCC A) (SETCC B)) can be implemented via: |
2448 | /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B))) |
2449 | /// - After transforming OR to NEG/AND combinations we may be able to use NEG |
2450 | /// elimination rules from earlier to implement the whole thing as a |
2451 | /// CCMP/FCCMP chain. |
2452 | /// |
2453 | /// As complete example: |
2454 | /// or (or (setCA (cmp A)) (setCB (cmp B))) |
2455 | /// (and (setCC (cmp C)) (setCD (cmp D)))" |
2456 | /// can be reassociated to: |
2457 | /// or (and (setCC (cmp C)) setCD (cmp D)) |
2458 | // (or (setCA (cmp A)) (setCB (cmp B))) |
2459 | /// can be transformed to: |
2460 | /// not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) |
2461 | /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" |
2462 | /// which can be implemented as: |
2463 | /// cmp C |
2464 | /// ccmp D, inv(CD), CC |
2465 | /// ccmp A, CA, inv(CD) |
2466 | /// ccmp B, CB, inv(CA) |
2467 | /// check for CB flags |
2468 | /// |
2469 | /// A counterexample is "or (and A B) (and C D)" which translates to |
2470 | /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we |
2471 | /// can only implement 1 of the inner (not) operations, but not both! |
2472 | /// @{ |
2473 | |
2474 | /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. |
2475 | static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, |
2476 | ISD::CondCode CC, SDValue CCOp, |
2477 | AArch64CC::CondCode Predicate, |
2478 | AArch64CC::CondCode OutCC, |
2479 | const SDLoc &DL, SelectionDAG &DAG) { |
2480 | unsigned Opcode = 0; |
2481 | const bool FullFP16 = |
2482 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
2483 | |
2484 | if (LHS.getValueType().isFloatingPoint()) { |
2485 | assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> ( 0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2485, __PRETTY_FUNCTION__)); |
2486 | if (LHS.getValueType() == MVT::f16 && !FullFP16) { |
2487 | LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); |
2488 | RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); |
2489 | } |
2490 | Opcode = AArch64ISD::FCCMP; |
2491 | } else if (RHS.getOpcode() == ISD::SUB) { |
2492 | SDValue SubOp0 = RHS.getOperand(0); |
2493 | if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
2494 | // See emitComparison() on why we can only do this for SETEQ and SETNE. |
2495 | Opcode = AArch64ISD::CCMN; |
2496 | RHS = RHS.getOperand(1); |
2497 | } |
2498 | } |
2499 | if (Opcode == 0) |
2500 | Opcode = AArch64ISD::CCMP; |
2501 | |
2502 | SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); |
2503 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); |
2504 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); |
2505 | SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); |
2506 | return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); |
2507 | } |
2508 | |
2509 | /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be |
2510 | /// expressed as a conjunction. See \ref AArch64CCMP. |
2511 | /// \param CanNegate Set to true if we can negate the whole sub-tree just by |
2512 | /// changing the conditions on the SETCC tests. |
2513 | /// (this means we can call emitConjunctionRec() with |
2514 | /// Negate==true on this sub-tree) |
2515 | /// \param MustBeFirst Set to true if this subtree needs to be negated and we |
2516 | /// cannot do the negation naturally. We are required to |
2517 | /// emit the subtree first in this case. |
2518 | /// \param WillNegate Is true if are called when the result of this |
2519 | /// subexpression must be negated. This happens when the |
2520 | /// outer expression is an OR. We can use this fact to know |
2521 | /// that we have a double negation (or (or ...) ...) that |
2522 | /// can be implemented for free. |
2523 | static bool canEmitConjunction(const SDValue Val, bool &CanNegate, |
2524 | bool &MustBeFirst, bool WillNegate, |
2525 | unsigned Depth = 0) { |
2526 | if (!Val.hasOneUse()) |
2527 | return false; |
2528 | unsigned Opcode = Val->getOpcode(); |
2529 | if (Opcode == ISD::SETCC) { |
2530 | if (Val->getOperand(0).getValueType() == MVT::f128) |
2531 | return false; |
2532 | CanNegate = true; |
2533 | MustBeFirst = false; |
2534 | return true; |
2535 | } |
2536 | // Protect against exponential runtime and stack overflow. |
2537 | if (Depth > 6) |
2538 | return false; |
2539 | if (Opcode == ISD::AND || Opcode == ISD::OR) { |
2540 | bool IsOR = Opcode == ISD::OR; |
2541 | SDValue O0 = Val->getOperand(0); |
2542 | SDValue O1 = Val->getOperand(1); |
2543 | bool CanNegateL; |
2544 | bool MustBeFirstL; |
2545 | if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) |
2546 | return false; |
2547 | bool CanNegateR; |
2548 | bool MustBeFirstR; |
2549 | if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) |
2550 | return false; |
2551 | |
2552 | if (MustBeFirstL && MustBeFirstR) |
2553 | return false; |
2554 | |
2555 | if (IsOR) { |
2556 | // For an OR expression we need to be able to naturally negate at least |
2557 | // one side or we cannot do the transformation at all. |
2558 | if (!CanNegateL && !CanNegateR) |
2559 | return false; |
2560 | // If we the result of the OR will be negated and we can naturally negate |
2561 | // the leafs, then this sub-tree as a whole negates naturally. |
2562 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
2563 | // If we cannot naturally negate the whole sub-tree, then this must be |
2564 | // emitted first. |
2565 | MustBeFirst = !CanNegate; |
2566 | } else { |
2567 | assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast <void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2567, __PRETTY_FUNCTION__)); |
2568 | // We cannot naturally negate an AND operation. |
2569 | CanNegate = false; |
2570 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
2571 | } |
2572 | return true; |
2573 | } |
2574 | return false; |
2575 | } |
2576 | |
2577 | /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain |
2578 | /// of CCMP/CFCMP ops. See @ref AArch64CCMP. |
2579 | /// Tries to transform the given i1 producing node @p Val to a series compare |
2580 | /// and conditional compare operations. @returns an NZCV flags producing node |
2581 | /// and sets @p OutCC to the flags that should be tested or returns SDValue() if |
2582 | /// transformation was not possible. |
2583 | /// \p Negate is true if we want this sub-tree being negated just by changing |
2584 | /// SETCC conditions. |
2585 | static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, |
2586 | AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, |
2587 | AArch64CC::CondCode Predicate) { |
2588 | // We're at a tree leaf, produce a conditional comparison operation. |
2589 | unsigned Opcode = Val->getOpcode(); |
2590 | if (Opcode == ISD::SETCC) { |
2591 | SDValue LHS = Val->getOperand(0); |
2592 | SDValue RHS = Val->getOperand(1); |
2593 | ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); |
2594 | bool isInteger = LHS.getValueType().isInteger(); |
2595 | if (Negate) |
2596 | CC = getSetCCInverse(CC, LHS.getValueType()); |
2597 | SDLoc DL(Val); |
2598 | // Determine OutCC and handle FP special case. |
2599 | if (isInteger) { |
2600 | OutCC = changeIntCCToAArch64CC(CC); |
2601 | } else { |
2602 | assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void > (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2602, __PRETTY_FUNCTION__)); |
2603 | AArch64CC::CondCode ExtraCC; |
2604 | changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); |
2605 | // Some floating point conditions can't be tested with a single condition |
2606 | // code. Construct an additional comparison in this case. |
2607 | if (ExtraCC != AArch64CC::AL) { |
2608 | SDValue ExtraCmp; |
2609 | if (!CCOp.getNode()) |
2610 | ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); |
2611 | else |
2612 | ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, |
2613 | ExtraCC, DL, DAG); |
2614 | CCOp = ExtraCmp; |
2615 | Predicate = ExtraCC; |
2616 | } |
2617 | } |
2618 | |
2619 | // Produce a normal comparison if we are first in the chain |
2620 | if (!CCOp) |
2621 | return emitComparison(LHS, RHS, CC, DL, DAG); |
2622 | // Otherwise produce a ccmp. |
2623 | return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, |
2624 | DAG); |
2625 | } |
2626 | assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree" ) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2626, __PRETTY_FUNCTION__)); |
2627 | |
2628 | bool IsOR = Opcode == ISD::OR; |
2629 | |
2630 | SDValue LHS = Val->getOperand(0); |
2631 | bool CanNegateL; |
2632 | bool MustBeFirstL; |
2633 | bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); |
2634 | assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast <void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2634, __PRETTY_FUNCTION__)); |
2635 | (void)ValidL; |
2636 | |
2637 | SDValue RHS = Val->getOperand(1); |
2638 | bool CanNegateR; |
2639 | bool MustBeFirstR; |
2640 | bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); |
2641 | assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast <void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2641, __PRETTY_FUNCTION__)); |
2642 | (void)ValidR; |
2643 | |
2644 | // Swap sub-tree that must come first to the right side. |
2645 | if (MustBeFirstL) { |
2646 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree" ) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2646, __PRETTY_FUNCTION__)); |
2647 | std::swap(LHS, RHS); |
2648 | std::swap(CanNegateL, CanNegateR); |
2649 | std::swap(MustBeFirstL, MustBeFirstR); |
2650 | } |
2651 | |
2652 | bool NegateR; |
2653 | bool NegateAfterR; |
2654 | bool NegateL; |
2655 | bool NegateAfterAll; |
2656 | if (Opcode == ISD::OR) { |
2657 | // Swap the sub-tree that we can negate naturally to the left. |
2658 | if (!CanNegateL) { |
2659 | assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable" ) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2659, __PRETTY_FUNCTION__)); |
2660 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree" ) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2660, __PRETTY_FUNCTION__)); |
2661 | assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2661, __PRETTY_FUNCTION__)); |
2662 | std::swap(LHS, RHS); |
2663 | NegateR = false; |
2664 | NegateAfterR = true; |
2665 | } else { |
2666 | // Negate the left sub-tree if possible, otherwise negate the result. |
2667 | NegateR = CanNegateR; |
2668 | NegateAfterR = !CanNegateR; |
2669 | } |
2670 | NegateL = true; |
2671 | NegateAfterAll = !Negate; |
2672 | } else { |
2673 | assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree" ) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2673, __PRETTY_FUNCTION__)); |
2674 | assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast <void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2674, __PRETTY_FUNCTION__)); |
2675 | |
2676 | NegateL = false; |
2677 | NegateR = false; |
2678 | NegateAfterR = false; |
2679 | NegateAfterAll = false; |
2680 | } |
2681 | |
2682 | // Emit sub-trees. |
2683 | AArch64CC::CondCode RHSCC; |
2684 | SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate); |
2685 | if (NegateAfterR) |
2686 | RHSCC = AArch64CC::getInvertedCondCode(RHSCC); |
2687 | SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC); |
2688 | if (NegateAfterAll) |
2689 | OutCC = AArch64CC::getInvertedCondCode(OutCC); |
2690 | return CmpL; |
2691 | } |
2692 | |
2693 | /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). |
2694 | /// In some cases this is even possible with OR operations in the expression. |
2695 | /// See \ref AArch64CCMP. |
2696 | /// \see emitConjunctionRec(). |
2697 | static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, |
2698 | AArch64CC::CondCode &OutCC) { |
2699 | bool DummyCanNegate; |
2700 | bool DummyMustBeFirst; |
2701 | if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) |
2702 | return SDValue(); |
2703 | |
2704 | return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); |
2705 | } |
2706 | |
2707 | /// @} |
2708 | |
2709 | /// Returns how profitable it is to fold a comparison's operand's shift and/or |
2710 | /// extension operations. |
2711 | static unsigned getCmpOperandFoldingProfit(SDValue Op) { |
2712 | auto isSupportedExtend = [&](SDValue V) { |
2713 | if (V.getOpcode() == ISD::SIGN_EXTEND_INREG) |
2714 | return true; |
2715 | |
2716 | if (V.getOpcode() == ISD::AND) |
2717 | if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { |
2718 | uint64_t Mask = MaskCst->getZExtValue(); |
2719 | return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); |
2720 | } |
2721 | |
2722 | return false; |
2723 | }; |
2724 | |
2725 | if (!Op.hasOneUse()) |
2726 | return 0; |
2727 | |
2728 | if (isSupportedExtend(Op)) |
2729 | return 1; |
2730 | |
2731 | unsigned Opc = Op.getOpcode(); |
2732 | if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) |
2733 | if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
2734 | uint64_t Shift = ShiftCst->getZExtValue(); |
2735 | if (isSupportedExtend(Op.getOperand(0))) |
2736 | return (Shift <= 4) ? 2 : 1; |
2737 | EVT VT = Op.getValueType(); |
2738 | if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63)) |
2739 | return 1; |
2740 | } |
2741 | |
2742 | return 0; |
2743 | } |
2744 | |
2745 | static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
2746 | SDValue &AArch64cc, SelectionDAG &DAG, |
2747 | const SDLoc &dl) { |
2748 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { |
2749 | EVT VT = RHS.getValueType(); |
2750 | uint64_t C = RHSC->getZExtValue(); |
2751 | if (!isLegalArithImmed(C)) { |
2752 | // Constant does not fit, try adjusting it by one? |
2753 | switch (CC) { |
2754 | default: |
2755 | break; |
2756 | case ISD::SETLT: |
2757 | case ISD::SETGE: |
2758 | if ((VT == MVT::i32 && C != 0x80000000 && |
2759 | isLegalArithImmed((uint32_t)(C - 1))) || |
2760 | (VT == MVT::i64 && C != 0x80000000ULL && |
2761 | isLegalArithImmed(C - 1ULL))) { |
2762 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; |
2763 | C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
2764 | RHS = DAG.getConstant(C, dl, VT); |
2765 | } |
2766 | break; |
2767 | case ISD::SETULT: |
2768 | case ISD::SETUGE: |
2769 | if ((VT == MVT::i32 && C != 0 && |
2770 | isLegalArithImmed((uint32_t)(C - 1))) || |
2771 | (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { |
2772 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; |
2773 | C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
2774 | RHS = DAG.getConstant(C, dl, VT); |
2775 | } |
2776 | break; |
2777 | case ISD::SETLE: |
2778 | case ISD::SETGT: |
2779 | if ((VT == MVT::i32 && C != INT32_MAX(2147483647) && |
2780 | isLegalArithImmed((uint32_t)(C + 1))) || |
2781 | (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) && |
2782 | isLegalArithImmed(C + 1ULL))) { |
2783 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; |
2784 | C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
2785 | RHS = DAG.getConstant(C, dl, VT); |
2786 | } |
2787 | break; |
2788 | case ISD::SETULE: |
2789 | case ISD::SETUGT: |
2790 | if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) && |
2791 | isLegalArithImmed((uint32_t)(C + 1))) || |
2792 | (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) && |
2793 | isLegalArithImmed(C + 1ULL))) { |
2794 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; |
2795 | C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
2796 | RHS = DAG.getConstant(C, dl, VT); |
2797 | } |
2798 | break; |
2799 | } |
2800 | } |
2801 | } |
2802 | |
2803 | // Comparisons are canonicalized so that the RHS operand is simpler than the |
2804 | // LHS one, the extreme case being when RHS is an immediate. However, AArch64 |
2805 | // can fold some shift+extend operations on the RHS operand, so swap the |
2806 | // operands if that can be done. |
2807 | // |
2808 | // For example: |
2809 | // lsl w13, w11, #1 |
2810 | // cmp w13, w12 |
2811 | // can be turned into: |
2812 | // cmp w12, w11, lsl #1 |
2813 | if (!isa<ConstantSDNode>(RHS) || |
2814 | !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) { |
2815 | SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS; |
2816 | |
2817 | if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) { |
2818 | std::swap(LHS, RHS); |
2819 | CC = ISD::getSetCCSwappedOperands(CC); |
2820 | } |
2821 | } |
2822 | |
2823 | SDValue Cmp; |
2824 | AArch64CC::CondCode AArch64CC; |
2825 | if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) { |
2826 | const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS); |
2827 | |
2828 | // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. |
2829 | // For the i8 operand, the largest immediate is 255, so this can be easily |
2830 | // encoded in the compare instruction. For the i16 operand, however, the |
2831 | // largest immediate cannot be encoded in the compare. |
2832 | // Therefore, use a sign extending load and cmn to avoid materializing the |
2833 | // -1 constant. For example, |
2834 | // movz w1, #65535 |
2835 | // ldrh w0, [x0, #0] |
2836 | // cmp w0, w1 |
2837 | // > |
2838 | // ldrsh w0, [x0, #0] |
2839 | // cmn w0, #1 |
2840 | // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) |
2841 | // if and only if (sext LHS) == (sext RHS). The checks are in place to |
2842 | // ensure both the LHS and RHS are truly zero extended and to make sure the |
2843 | // transformation is profitable. |
2844 | if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && |
2845 | cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && |
2846 | cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && |
2847 | LHS.getNode()->hasNUsesOfValue(1, 0)) { |
2848 | int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); |
2849 | if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { |
2850 | SDValue SExt = |
2851 | DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, |
2852 | DAG.getValueType(MVT::i16)); |
2853 | Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, |
2854 | RHS.getValueType()), |
2855 | CC, dl, DAG); |
2856 | AArch64CC = changeIntCCToAArch64CC(CC); |
2857 | } |
2858 | } |
2859 | |
2860 | if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { |
2861 | if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) { |
2862 | if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) |
2863 | AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); |
2864 | } |
2865 | } |
2866 | } |
2867 | |
2868 | if (!Cmp) { |
2869 | Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
2870 | AArch64CC = changeIntCCToAArch64CC(CC); |
2871 | } |
2872 | AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); |
2873 | return Cmp; |
2874 | } |
2875 | |
2876 | static std::pair<SDValue, SDValue> |
2877 | getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { |
2878 | assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT:: i64) && "Unsupported value type") ? static_cast<void > (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2879, __PRETTY_FUNCTION__)) |
2879 | "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT:: i64) && "Unsupported value type") ? static_cast<void > (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2879, __PRETTY_FUNCTION__)); |
2880 | SDValue Value, Overflow; |
2881 | SDLoc DL(Op); |
2882 | SDValue LHS = Op.getOperand(0); |
2883 | SDValue RHS = Op.getOperand(1); |
2884 | unsigned Opc = 0; |
2885 | switch (Op.getOpcode()) { |
2886 | default: |
2887 | llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2887); |
2888 | case ISD::SADDO: |
2889 | Opc = AArch64ISD::ADDS; |
2890 | CC = AArch64CC::VS; |
2891 | break; |
2892 | case ISD::UADDO: |
2893 | Opc = AArch64ISD::ADDS; |
2894 | CC = AArch64CC::HS; |
2895 | break; |
2896 | case ISD::SSUBO: |
2897 | Opc = AArch64ISD::SUBS; |
2898 | CC = AArch64CC::VS; |
2899 | break; |
2900 | case ISD::USUBO: |
2901 | Opc = AArch64ISD::SUBS; |
2902 | CC = AArch64CC::LO; |
2903 | break; |
2904 | // Multiply needs a little bit extra work. |
2905 | case ISD::SMULO: |
2906 | case ISD::UMULO: { |
2907 | CC = AArch64CC::NE; |
2908 | bool IsSigned = Op.getOpcode() == ISD::SMULO; |
2909 | if (Op.getValueType() == MVT::i32) { |
2910 | unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
2911 | // For a 32 bit multiply with overflow check we want the instruction |
2912 | // selector to generate a widening multiply (SMADDL/UMADDL). For that we |
2913 | // need to generate the following pattern: |
2914 | // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) |
2915 | LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); |
2916 | RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); |
2917 | SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
2918 | SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, |
2919 | DAG.getConstant(0, DL, MVT::i64)); |
2920 | // On AArch64 the upper 32 bits are always zero extended for a 32 bit |
2921 | // operation. We need to clear out the upper 32 bits, because we used a |
2922 | // widening multiply that wrote all 64 bits. In the end this should be a |
2923 | // noop. |
2924 | Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); |
2925 | if (IsSigned) { |
2926 | // The signed overflow check requires more than just a simple check for |
2927 | // any bit set in the upper 32 bits of the result. These bits could be |
2928 | // just the sign bits of a negative number. To perform the overflow |
2929 | // check we have to arithmetic shift right the 32nd bit of the result by |
2930 | // 31 bits. Then we compare the result to the upper 32 bits. |
2931 | SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, |
2932 | DAG.getConstant(32, DL, MVT::i64)); |
2933 | UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); |
2934 | SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, |
2935 | DAG.getConstant(31, DL, MVT::i64)); |
2936 | // It is important that LowerBits is last, otherwise the arithmetic |
2937 | // shift will not be folded into the compare (SUBS). |
2938 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); |
2939 | Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) |
2940 | .getValue(1); |
2941 | } else { |
2942 | // The overflow check for unsigned multiply is easy. We only need to |
2943 | // check if any of the upper 32 bits are set. This can be done with a |
2944 | // CMP (shifted register). For that we need to generate the following |
2945 | // pattern: |
2946 | // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) |
2947 | SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, |
2948 | DAG.getConstant(32, DL, MVT::i64)); |
2949 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
2950 | Overflow = |
2951 | DAG.getNode(AArch64ISD::SUBS, DL, VTs, |
2952 | DAG.getConstant(0, DL, MVT::i64), |
2953 | UpperBits).getValue(1); |
2954 | } |
2955 | break; |
2956 | } |
2957 | assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 2957, __PRETTY_FUNCTION__)); |
2958 | // For the 64 bit multiply |
2959 | Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
2960 | if (IsSigned) { |
2961 | SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); |
2962 | SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, |
2963 | DAG.getConstant(63, DL, MVT::i64)); |
2964 | // It is important that LowerBits is last, otherwise the arithmetic |
2965 | // shift will not be folded into the compare (SUBS). |
2966 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
2967 | Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) |
2968 | .getValue(1); |
2969 | } else { |
2970 | SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); |
2971 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
2972 | Overflow = |
2973 | DAG.getNode(AArch64ISD::SUBS, DL, VTs, |
2974 | DAG.getConstant(0, DL, MVT::i64), |
2975 | UpperBits).getValue(1); |
2976 | } |
2977 | break; |
2978 | } |
2979 | } // switch (...) |
2980 | |
2981 | if (Opc) { |
2982 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
2983 | |
2984 | // Emit the AArch64 operation with overflow check. |
2985 | Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); |
2986 | Overflow = Value.getValue(1); |
2987 | } |
2988 | return std::make_pair(Value, Overflow); |
2989 | } |
2990 | |
2991 | SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { |
2992 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
2993 | return LowerToScalableOp(Op, DAG); |
2994 | |
2995 | SDValue Sel = Op.getOperand(0); |
2996 | SDValue Other = Op.getOperand(1); |
2997 | SDLoc dl(Sel); |
2998 | |
2999 | // If the operand is an overflow checking operation, invert the condition |
3000 | // code and kill the Not operation. I.e., transform: |
3001 | // (xor (overflow_op_bool, 1)) |
3002 | // --> |
3003 | // (csel 1, 0, invert(cc), overflow_op_bool) |
3004 | // ... which later gets transformed to just a cset instruction with an |
3005 | // inverted condition code, rather than a cset + eor sequence. |
3006 | if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) { |
3007 | // Only lower legal XALUO ops. |
3008 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) |
3009 | return SDValue(); |
3010 | |
3011 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
3012 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
3013 | AArch64CC::CondCode CC; |
3014 | SDValue Value, Overflow; |
3015 | std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); |
3016 | SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
3017 | return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal, |
3018 | CCVal, Overflow); |
3019 | } |
3020 | // If neither operand is a SELECT_CC, give up. |
3021 | if (Sel.getOpcode() != ISD::SELECT_CC) |
3022 | std::swap(Sel, Other); |
3023 | if (Sel.getOpcode() != ISD::SELECT_CC) |
3024 | return Op; |
3025 | |
3026 | // The folding we want to perform is: |
3027 | // (xor x, (select_cc a, b, cc, 0, -1) ) |
3028 | // --> |
3029 | // (csel x, (xor x, -1), cc ...) |
3030 | // |
3031 | // The latter will get matched to a CSINV instruction. |
3032 | |
3033 | ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get(); |
3034 | SDValue LHS = Sel.getOperand(0); |
3035 | SDValue RHS = Sel.getOperand(1); |
3036 | SDValue TVal = Sel.getOperand(2); |
3037 | SDValue FVal = Sel.getOperand(3); |
3038 | |
3039 | // FIXME: This could be generalized to non-integer comparisons. |
3040 | if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) |
3041 | return Op; |
3042 | |
3043 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); |
3044 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); |
3045 | |
3046 | // The values aren't constants, this isn't the pattern we're looking for. |
3047 | if (!CFVal || !CTVal) |
3048 | return Op; |
3049 | |
3050 | // We can commute the SELECT_CC by inverting the condition. This |
3051 | // might be needed to make this fit into a CSINV pattern. |
3052 | if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { |
3053 | std::swap(TVal, FVal); |
3054 | std::swap(CTVal, CFVal); |
3055 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
3056 | } |
3057 | |
3058 | // If the constants line up, perform the transform! |
3059 | if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { |
3060 | SDValue CCVal; |
3061 | SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); |
3062 | |
3063 | FVal = Other; |
3064 | TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, |
3065 | DAG.getConstant(-1ULL, dl, Other.getValueType())); |
3066 | |
3067 | return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, |
3068 | CCVal, Cmp); |
3069 | } |
3070 | |
3071 | return Op; |
3072 | } |
3073 | |
3074 | static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { |
3075 | EVT VT = Op.getValueType(); |
3076 | |
3077 | // Let legalize expand this if it isn't a legal type yet. |
3078 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
3079 | return SDValue(); |
3080 | |
3081 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
3082 | |
3083 | unsigned Opc; |
3084 | bool ExtraOp = false; |
3085 | switch (Op.getOpcode()) { |
3086 | default: |
3087 | llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3087); |
3088 | case ISD::ADDC: |
3089 | Opc = AArch64ISD::ADDS; |
3090 | break; |
3091 | case ISD::SUBC: |
3092 | Opc = AArch64ISD::SUBS; |
3093 | break; |
3094 | case ISD::ADDE: |
3095 | Opc = AArch64ISD::ADCS; |
3096 | ExtraOp = true; |
3097 | break; |
3098 | case ISD::SUBE: |
3099 | Opc = AArch64ISD::SBCS; |
3100 | ExtraOp = true; |
3101 | break; |
3102 | } |
3103 | |
3104 | if (!ExtraOp) |
3105 | return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); |
3106 | return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), |
3107 | Op.getOperand(2)); |
3108 | } |
3109 | |
3110 | static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
3111 | // Let legalize expand this if it isn't a legal type yet. |
3112 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) |
3113 | return SDValue(); |
3114 | |
3115 | SDLoc dl(Op); |
3116 | AArch64CC::CondCode CC; |
3117 | // The actual operation that sets the overflow or carry flag. |
3118 | SDValue Value, Overflow; |
3119 | std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); |
3120 | |
3121 | // We use 0 and 1 as false and true values. |
3122 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
3123 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
3124 | |
3125 | // We use an inverted condition, because the conditional select is inverted |
3126 | // too. This will allow it to be selected to a single instruction: |
3127 | // CSINC Wd, WZR, WZR, invert(cond). |
3128 | SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
3129 | Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, |
3130 | CCVal, Overflow); |
3131 | |
3132 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
3133 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); |
3134 | } |
3135 | |
3136 | // Prefetch operands are: |
3137 | // 1: Address to prefetch |
3138 | // 2: bool isWrite |
3139 | // 3: int locality (0 = no locality ... 3 = extreme locality) |
3140 | // 4: bool isDataCache |
3141 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { |
3142 | SDLoc DL(Op); |
3143 | unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); |
3144 | unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); |
3145 | unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); |
3146 | |
3147 | bool IsStream = !Locality; |
3148 | // When the locality number is set |
3149 | if (Locality) { |
3150 | // The front-end should have filtered out the out-of-range values |
3151 | assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range" ) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3151, __PRETTY_FUNCTION__)); |
3152 | // The locality degree is the opposite of the cache speed. |
3153 | // Put the number the other way around. |
3154 | // The encoding starts at 0 for level 1 |
3155 | Locality = 3 - Locality; |
3156 | } |
3157 | |
3158 | // built the mask value encoding the expected behavior. |
3159 | unsigned PrfOp = (IsWrite << 4) | // Load/Store bit |
3160 | (!IsData << 3) | // IsDataCache bit |
3161 | (Locality << 1) | // Cache level bits |
3162 | (unsigned)IsStream; // Stream bit |
3163 | return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), |
3164 | DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); |
3165 | } |
3166 | |
3167 | SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, |
3168 | SelectionDAG &DAG) const { |
3169 | if (Op.getValueType().isScalableVector()) |
3170 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); |
3171 | |
3172 | assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering" ) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3172, __PRETTY_FUNCTION__)); |
3173 | return SDValue(); |
3174 | } |
3175 | |
3176 | SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, |
3177 | SelectionDAG &DAG) const { |
3178 | if (Op.getValueType().isScalableVector()) |
3179 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); |
3180 | |
3181 | bool IsStrict = Op->isStrictFPOpcode(); |
3182 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3183 | EVT SrcVT = SrcVal.getValueType(); |
3184 | |
3185 | if (SrcVT != MVT::f128) { |
3186 | // Expand cases where the input is a vector bigger than NEON. |
3187 | if (useSVEForFixedLengthVectorVT(SrcVT)) |
3188 | return SDValue(); |
3189 | |
3190 | // It's legal except when f128 is involved |
3191 | return Op; |
3192 | } |
3193 | |
3194 | return SDValue(); |
3195 | } |
3196 | |
3197 | SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, |
3198 | SelectionDAG &DAG) const { |
3199 | // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. |
3200 | // Any additional optimization in this function should be recorded |
3201 | // in the cost tables. |
3202 | EVT InVT = Op.getOperand(0).getValueType(); |
3203 | EVT VT = Op.getValueType(); |
3204 | |
3205 | if (VT.isScalableVector()) { |
3206 | unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT |
3207 | ? AArch64ISD::FCVTZU_MERGE_PASSTHRU |
3208 | : AArch64ISD::FCVTZS_MERGE_PASSTHRU; |
3209 | return LowerToPredicatedOp(Op, DAG, Opcode); |
3210 | } |
3211 | |
3212 | unsigned NumElts = InVT.getVectorNumElements(); |
3213 | |
3214 | // f16 conversions are promoted to f32 when full fp16 is not supported. |
3215 | if (InVT.getVectorElementType() == MVT::f16 && |
3216 | !Subtarget->hasFullFP16()) { |
3217 | MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); |
3218 | SDLoc dl(Op); |
3219 | return DAG.getNode( |
3220 | Op.getOpcode(), dl, Op.getValueType(), |
3221 | DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); |
3222 | } |
3223 | |
3224 | uint64_t VTSize = VT.getFixedSizeInBits(); |
3225 | uint64_t InVTSize = InVT.getFixedSizeInBits(); |
3226 | if (VTSize < InVTSize) { |
3227 | SDLoc dl(Op); |
3228 | SDValue Cv = |
3229 | DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), |
3230 | Op.getOperand(0)); |
3231 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); |
3232 | } |
3233 | |
3234 | if (VTSize > InVTSize) { |
3235 | SDLoc dl(Op); |
3236 | MVT ExtVT = |
3237 | MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), |
3238 | VT.getVectorNumElements()); |
3239 | SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); |
3240 | return DAG.getNode(Op.getOpcode(), dl, VT, Ext); |
3241 | } |
3242 | |
3243 | // Type changing conversions are illegal. |
3244 | return Op; |
3245 | } |
3246 | |
3247 | SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, |
3248 | SelectionDAG &DAG) const { |
3249 | bool IsStrict = Op->isStrictFPOpcode(); |
3250 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3251 | |
3252 | if (SrcVal.getValueType().isVector()) |
3253 | return LowerVectorFP_TO_INT(Op, DAG); |
3254 | |
3255 | // f16 conversions are promoted to f32 when full fp16 is not supported. |
3256 | if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { |
3257 | assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented" ) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3257, __PRETTY_FUNCTION__)); |
3258 | SDLoc dl(Op); |
3259 | return DAG.getNode( |
3260 | Op.getOpcode(), dl, Op.getValueType(), |
3261 | DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); |
3262 | } |
3263 | |
3264 | if (SrcVal.getValueType() != MVT::f128) { |
3265 | // It's legal except when f128 is involved |
3266 | return Op; |
3267 | } |
3268 | |
3269 | return SDValue(); |
3270 | } |
3271 | |
3272 | SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, |
3273 | SelectionDAG &DAG) const { |
3274 | // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. |
3275 | // Any additional optimization in this function should be recorded |
3276 | // in the cost tables. |
3277 | EVT VT = Op.getValueType(); |
3278 | SDLoc dl(Op); |
3279 | SDValue In = Op.getOperand(0); |
3280 | EVT InVT = In.getValueType(); |
3281 | unsigned Opc = Op.getOpcode(); |
3282 | bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; |
3283 | |
3284 | if (VT.isScalableVector()) { |
3285 | if (InVT.getVectorElementType() == MVT::i1) { |
3286 | // We can't directly extend an SVE predicate; extend it first. |
3287 | unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
3288 | EVT CastVT = getPromotedVTForPredicate(InVT); |
3289 | In = DAG.getNode(CastOpc, dl, CastVT, In); |
3290 | return DAG.getNode(Opc, dl, VT, In); |
3291 | } |
3292 | |
3293 | unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU |
3294 | : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU; |
3295 | return LowerToPredicatedOp(Op, DAG, Opcode); |
3296 | } |
3297 | |
3298 | uint64_t VTSize = VT.getFixedSizeInBits(); |
3299 | uint64_t InVTSize = InVT.getFixedSizeInBits(); |
3300 | if (VTSize < InVTSize) { |
3301 | MVT CastVT = |
3302 | MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), |
3303 | InVT.getVectorNumElements()); |
3304 | In = DAG.getNode(Opc, dl, CastVT, In); |
3305 | return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); |
3306 | } |
3307 | |
3308 | if (VTSize > InVTSize) { |
3309 | unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
3310 | EVT CastVT = VT.changeVectorElementTypeToInteger(); |
3311 | In = DAG.getNode(CastOpc, dl, CastVT, In); |
3312 | return DAG.getNode(Opc, dl, VT, In); |
3313 | } |
3314 | |
3315 | return Op; |
3316 | } |
3317 | |
3318 | SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, |
3319 | SelectionDAG &DAG) const { |
3320 | if (Op.getValueType().isVector()) |
3321 | return LowerVectorINT_TO_FP(Op, DAG); |
3322 | |
3323 | bool IsStrict = Op->isStrictFPOpcode(); |
3324 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3325 | |
3326 | // f16 conversions are promoted to f32 when full fp16 is not supported. |
3327 | if (Op.getValueType() == MVT::f16 && |
3328 | !Subtarget->hasFullFP16()) { |
3329 | assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented" ) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3329, __PRETTY_FUNCTION__)); |
3330 | SDLoc dl(Op); |
3331 | return DAG.getNode( |
3332 | ISD::FP_ROUND, dl, MVT::f16, |
3333 | DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), |
3334 | DAG.getIntPtrConstant(0, dl)); |
3335 | } |
3336 | |
3337 | // i128 conversions are libcalls. |
3338 | if (SrcVal.getValueType() == MVT::i128) |
3339 | return SDValue(); |
3340 | |
3341 | // Other conversions are legal, unless it's to the completely software-based |
3342 | // fp128. |
3343 | if (Op.getValueType() != MVT::f128) |
3344 | return Op; |
3345 | return SDValue(); |
3346 | } |
3347 | |
3348 | SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, |
3349 | SelectionDAG &DAG) const { |
3350 | // For iOS, we want to call an alternative entry point: __sincos_stret, |
3351 | // which returns the values in two S / D registers. |
3352 | SDLoc dl(Op); |
3353 | SDValue Arg = Op.getOperand(0); |
3354 | EVT ArgVT = Arg.getValueType(); |
3355 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
3356 | |
3357 | ArgListTy Args; |
3358 | ArgListEntry Entry; |
3359 | |
3360 | Entry.Node = Arg; |
3361 | Entry.Ty = ArgTy; |
3362 | Entry.IsSExt = false; |
3363 | Entry.IsZExt = false; |
3364 | Args.push_back(Entry); |
3365 | |
3366 | RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 |
3367 | : RTLIB::SINCOS_STRET_F32; |
3368 | const char *LibcallName = getLibcallName(LC); |
3369 | SDValue Callee = |
3370 | DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); |
3371 | |
3372 | StructType *RetTy = StructType::get(ArgTy, ArgTy); |
3373 | TargetLowering::CallLoweringInfo CLI(DAG); |
3374 | CLI.setDebugLoc(dl) |
3375 | .setChain(DAG.getEntryNode()) |
3376 | .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args)); |
3377 | |
3378 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); |
3379 | return CallResult.first; |
3380 | } |
3381 | |
3382 | static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) { |
3383 | EVT OpVT = Op.getValueType(); |
3384 | if (OpVT != MVT::f16 && OpVT != MVT::bf16) |
3385 | return SDValue(); |
3386 | |
3387 | assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast< void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3387, __PRETTY_FUNCTION__)); |
3388 | SDLoc DL(Op); |
3389 | |
3390 | Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); |
3391 | Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); |
3392 | return SDValue( |
3393 | DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op, |
3394 | DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), |
3395 | 0); |
3396 | } |
3397 | |
3398 | static EVT getExtensionTo64Bits(const EVT &OrigVT) { |
3399 | if (OrigVT.getSizeInBits() >= 64) |
3400 | return OrigVT; |
3401 | |
3402 | assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type" ) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3402, __PRETTY_FUNCTION__)); |
3403 | |
3404 | MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; |
3405 | switch (OrigSimpleTy) { |
3406 | default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3406); |
3407 | case MVT::v2i8: |
3408 | case MVT::v2i16: |
3409 | return MVT::v2i32; |
3410 | case MVT::v4i8: |
3411 | return MVT::v4i16; |
3412 | } |
3413 | } |
3414 | |
3415 | static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, |
3416 | const EVT &OrigTy, |
3417 | const EVT &ExtTy, |
3418 | unsigned ExtOpcode) { |
3419 | // The vector originally had a size of OrigTy. It was then extended to ExtTy. |
3420 | // We expect the ExtTy to be 128-bits total. If the OrigTy is less than |
3421 | // 64-bits we need to insert a new extension so that it will be 64-bits. |
3422 | assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size" ) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3422, __PRETTY_FUNCTION__)); |
3423 | if (OrigTy.getSizeInBits() >= 64) |
3424 | return N; |
3425 | |
3426 | // Must extend size to at least 64 bits to be used as an operand for VMULL. |
3427 | EVT NewVT = getExtensionTo64Bits(OrigTy); |
3428 | |
3429 | return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); |
3430 | } |
3431 | |
3432 | static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, |
3433 | bool isSigned) { |
3434 | EVT VT = N->getValueType(0); |
3435 | |
3436 | if (N->getOpcode() != ISD::BUILD_VECTOR) |
3437 | return false; |
3438 | |
3439 | for (const SDValue &Elt : N->op_values()) { |
3440 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { |
3441 | unsigned EltSize = VT.getScalarSizeInBits(); |
3442 | unsigned HalfSize = EltSize / 2; |
3443 | if (isSigned) { |
3444 | if (!isIntN(HalfSize, C->getSExtValue())) |
3445 | return false; |
3446 | } else { |
3447 | if (!isUIntN(HalfSize, C->getZExtValue())) |
3448 | return false; |
3449 | } |
3450 | continue; |
3451 | } |
3452 | return false; |
3453 | } |
3454 | |
3455 | return true; |
3456 | } |
3457 | |
3458 | static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { |
3459 | if (N->getOpcode() == ISD::SIGN_EXTEND || |
3460 | N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND) |
3461 | return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG, |
3462 | N->getOperand(0)->getValueType(0), |
3463 | N->getValueType(0), |
3464 | N->getOpcode()); |
3465 | |
3466 | assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR" ) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3466, __PRETTY_FUNCTION__)); |
3467 | EVT VT = N->getValueType(0); |
3468 | SDLoc dl(N); |
3469 | unsigned EltSize = VT.getScalarSizeInBits() / 2; |
3470 | unsigned NumElts = VT.getVectorNumElements(); |
3471 | MVT TruncVT = MVT::getIntegerVT(EltSize); |
3472 | SmallVector<SDValue, 8> Ops; |
3473 | for (unsigned i = 0; i != NumElts; ++i) { |
3474 | ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); |
3475 | const APInt &CInt = C->getAPIntValue(); |
3476 | // Element types smaller than 32 bits are not legal, so use i32 elements. |
3477 | // The values are implicitly truncated so sext vs. zext doesn't matter. |
3478 | Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); |
3479 | } |
3480 | return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); |
3481 | } |
3482 | |
3483 | static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { |
3484 | return N->getOpcode() == ISD::SIGN_EXTEND || |
3485 | N->getOpcode() == ISD::ANY_EXTEND || |
3486 | isExtendedBUILD_VECTOR(N, DAG, true); |
3487 | } |
3488 | |
3489 | static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { |
3490 | return N->getOpcode() == ISD::ZERO_EXTEND || |
3491 | N->getOpcode() == ISD::ANY_EXTEND || |
3492 | isExtendedBUILD_VECTOR(N, DAG, false); |
3493 | } |
3494 | |
3495 | static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { |
3496 | unsigned Opcode = N->getOpcode(); |
3497 | if (Opcode == ISD::ADD || Opcode == ISD::SUB) { |
3498 | SDNode *N0 = N->getOperand(0).getNode(); |
3499 | SDNode *N1 = N->getOperand(1).getNode(); |
3500 | return N0->hasOneUse() && N1->hasOneUse() && |
3501 | isSignExtended(N0, DAG) && isSignExtended(N1, DAG); |
3502 | } |
3503 | return false; |
3504 | } |
3505 | |
3506 | static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { |
3507 | unsigned Opcode = N->getOpcode(); |
3508 | if (Opcode == ISD::ADD || Opcode == ISD::SUB) { |
3509 | SDNode *N0 = N->getOperand(0).getNode(); |
3510 | SDNode *N1 = N->getOperand(1).getNode(); |
3511 | return N0->hasOneUse() && N1->hasOneUse() && |
3512 | isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); |
3513 | } |
3514 | return false; |
3515 | } |
3516 | |
3517 | SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
3518 | SelectionDAG &DAG) const { |
3519 | // The rounding mode is in bits 23:22 of the FPSCR. |
3520 | // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 |
3521 | // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) |
3522 | // so that the shift + and get folded into a bitfield extract. |
3523 | SDLoc dl(Op); |
3524 | |
3525 | SDValue Chain = Op.getOperand(0); |
3526 | SDValue FPCR_64 = DAG.getNode( |
3527 | ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other}, |
3528 | {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)}); |
3529 | Chain = FPCR_64.getValue(1); |
3530 | SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64); |
3531 | SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32, |
3532 | DAG.getConstant(1U << 22, dl, MVT::i32)); |
3533 | SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, |
3534 | DAG.getConstant(22, dl, MVT::i32)); |
3535 | SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, |
3536 | DAG.getConstant(3, dl, MVT::i32)); |
3537 | return DAG.getMergeValues({AND, Chain}, dl); |
3538 | } |
3539 | |
3540 | SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op, |
3541 | SelectionDAG &DAG) const { |
3542 | SDLoc DL(Op); |
3543 | SDValue Chain = Op->getOperand(0); |
3544 | SDValue RMValue = Op->getOperand(1); |
3545 | |
3546 | // The rounding mode is in bits 23:22 of the FPCR. |
3547 | // The llvm.set.rounding argument value to the rounding mode in FPCR mapping |
3548 | // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is |
3549 | // ((arg - 1) & 3) << 22). |
3550 | // |
3551 | // The argument of llvm.set.rounding must be within the segment [0, 3], so |
3552 | // NearestTiesToAway (4) is not handled here. It is responsibility of the code |
3553 | // generated llvm.set.rounding to ensure this condition. |
3554 | |
3555 | // Calculate new value of FPCR[23:22]. |
3556 | RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue, |
3557 | DAG.getConstant(1, DL, MVT::i32)); |
3558 | RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue, |
3559 | DAG.getConstant(0x3, DL, MVT::i32)); |
3560 | RMValue = |
3561 | DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue, |
3562 | DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32)); |
3563 | RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue); |
3564 | |
3565 | // Get current value of FPCR. |
3566 | SDValue Ops[] = { |
3567 | Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)}; |
3568 | SDValue FPCR = |
3569 | DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops); |
3570 | Chain = FPCR.getValue(1); |
3571 | FPCR = FPCR.getValue(0); |
3572 | |
3573 | // Put new rounding mode into FPSCR[23:22]. |
3574 | const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos); |
3575 | FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR, |
3576 | DAG.getConstant(RMMask, DL, MVT::i64)); |
3577 | FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue); |
3578 | SDValue Ops2[] = { |
3579 | Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), |
3580 | FPCR}; |
3581 | return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); |
3582 | } |
3583 | |
3584 | SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { |
3585 | EVT VT = Op.getValueType(); |
3586 | |
3587 | // If SVE is available then i64 vector multiplications can also be made legal. |
3588 | bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64; |
3589 | |
3590 | if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON)) |
3591 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON); |
3592 | |
3593 | // Multiplications are only custom-lowered for 128-bit vectors so that |
3594 | // VMULL can be detected. Otherwise v2i64 multiplications are not legal. |
3595 | assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL" ) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3596, __PRETTY_FUNCTION__)) |
3596 | "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL" ) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3596, __PRETTY_FUNCTION__)); |
3597 | SDNode *N0 = Op.getOperand(0).getNode(); |
3598 | SDNode *N1 = Op.getOperand(1).getNode(); |
3599 | unsigned NewOpc = 0; |
3600 | bool isMLA = false; |
3601 | bool isN0SExt = isSignExtended(N0, DAG); |
3602 | bool isN1SExt = isSignExtended(N1, DAG); |
3603 | if (isN0SExt && isN1SExt) |
3604 | NewOpc = AArch64ISD::SMULL; |
3605 | else { |
3606 | bool isN0ZExt = isZeroExtended(N0, DAG); |
3607 | bool isN1ZExt = isZeroExtended(N1, DAG); |
3608 | if (isN0ZExt && isN1ZExt) |
3609 | NewOpc = AArch64ISD::UMULL; |
3610 | else if (isN1SExt || isN1ZExt) { |
3611 | // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these |
3612 | // into (s/zext A * s/zext C) + (s/zext B * s/zext C) |
3613 | if (isN1SExt && isAddSubSExt(N0, DAG)) { |
3614 | NewOpc = AArch64ISD::SMULL; |
3615 | isMLA = true; |
3616 | } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { |
3617 | NewOpc = AArch64ISD::UMULL; |
3618 | isMLA = true; |
3619 | } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { |
3620 | std::swap(N0, N1); |
3621 | NewOpc = AArch64ISD::UMULL; |
3622 | isMLA = true; |
3623 | } |
3624 | } |
3625 | |
3626 | if (!NewOpc) { |
3627 | if (VT == MVT::v2i64) |
3628 | // Fall through to expand this. It is not legal. |
3629 | return SDValue(); |
3630 | else |
3631 | // Other vector multiplications are legal. |
3632 | return Op; |
3633 | } |
3634 | } |
3635 | |
3636 | // Legalize to a S/UMULL instruction |
3637 | SDLoc DL(Op); |
3638 | SDValue Op0; |
3639 | SDValue Op1 = skipExtensionForVectorMULL(N1, DAG); |
3640 | if (!isMLA) { |
3641 | Op0 = skipExtensionForVectorMULL(N0, DAG); |
3642 | assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType ().is64BitVector() && "unexpected types for extended operands to VMULL" ) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3644, __PRETTY_FUNCTION__)) |
3643 | Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType ().is64BitVector() && "unexpected types for extended operands to VMULL" ) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3644, __PRETTY_FUNCTION__)) |
3644 | "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType ().is64BitVector() && "unexpected types for extended operands to VMULL" ) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3644, __PRETTY_FUNCTION__)); |
3645 | return DAG.getNode(NewOpc, DL, VT, Op0, Op1); |
3646 | } |
3647 | // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during |
3648 | // isel lowering to take advantage of no-stall back to back s/umul + s/umla. |
3649 | // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57 |
3650 | SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG); |
3651 | SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG); |
3652 | EVT Op1VT = Op1.getValueType(); |
3653 | return DAG.getNode(N0->getOpcode(), DL, VT, |
3654 | DAG.getNode(NewOpc, DL, VT, |
3655 | DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), |
3656 | DAG.getNode(NewOpc, DL, VT, |
3657 | DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); |
3658 | } |
3659 | |
3660 | static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, |
3661 | int Pattern) { |
3662 | return DAG.getNode(AArch64ISD::PTRUE, DL, VT, |
3663 | DAG.getTargetConstant(Pattern, DL, MVT::i32)); |
3664 | } |
3665 | |
3666 | SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
3667 | SelectionDAG &DAG) const { |
3668 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
3669 | SDLoc dl(Op); |
3670 | switch (IntNo) { |
3671 | default: return SDValue(); // Don't custom lower most intrinsics. |
3672 | case Intrinsic::thread_pointer: { |
3673 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3674 | return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); |
3675 | } |
3676 | case Intrinsic::aarch64_neon_abs: { |
3677 | EVT Ty = Op.getValueType(); |
3678 | if (Ty == MVT::i64) { |
3679 | SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, |
3680 | Op.getOperand(1)); |
3681 | Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result); |
3682 | return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result); |
3683 | } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) { |
3684 | return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1)); |
3685 | } else { |
3686 | report_fatal_error("Unexpected type for AArch64 NEON intrinic"); |
3687 | } |
3688 | } |
3689 | case Intrinsic::aarch64_neon_smax: |
3690 | return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), |
3691 | Op.getOperand(1), Op.getOperand(2)); |
3692 | case Intrinsic::aarch64_neon_umax: |
3693 | return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), |
3694 | Op.getOperand(1), Op.getOperand(2)); |
3695 | case Intrinsic::aarch64_neon_smin: |
3696 | return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), |
3697 | Op.getOperand(1), Op.getOperand(2)); |
3698 | case Intrinsic::aarch64_neon_umin: |
3699 | return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), |
3700 | Op.getOperand(1), Op.getOperand(2)); |
3701 | |
3702 | case Intrinsic::aarch64_sve_sunpkhi: |
3703 | return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), |
3704 | Op.getOperand(1)); |
3705 | case Intrinsic::aarch64_sve_sunpklo: |
3706 | return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(), |
3707 | Op.getOperand(1)); |
3708 | case Intrinsic::aarch64_sve_uunpkhi: |
3709 | return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(), |
3710 | Op.getOperand(1)); |
3711 | case Intrinsic::aarch64_sve_uunpklo: |
3712 | return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), |
3713 | Op.getOperand(1)); |
3714 | case Intrinsic::aarch64_sve_clasta_n: |
3715 | return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(), |
3716 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
3717 | case Intrinsic::aarch64_sve_clastb_n: |
3718 | return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(), |
3719 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
3720 | case Intrinsic::aarch64_sve_lasta: |
3721 | return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(), |
3722 | Op.getOperand(1), Op.getOperand(2)); |
3723 | case Intrinsic::aarch64_sve_lastb: |
3724 | return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(), |
3725 | Op.getOperand(1), Op.getOperand(2)); |
3726 | case Intrinsic::aarch64_sve_rev: |
3727 | return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(), |
3728 | Op.getOperand(1)); |
3729 | case Intrinsic::aarch64_sve_tbl: |
3730 | return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), |
3731 | Op.getOperand(1), Op.getOperand(2)); |
3732 | case Intrinsic::aarch64_sve_trn1: |
3733 | return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(), |
3734 | Op.getOperand(1), Op.getOperand(2)); |
3735 | case Intrinsic::aarch64_sve_trn2: |
3736 | return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(), |
3737 | Op.getOperand(1), Op.getOperand(2)); |
3738 | case Intrinsic::aarch64_sve_uzp1: |
3739 | return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(), |
3740 | Op.getOperand(1), Op.getOperand(2)); |
3741 | case Intrinsic::aarch64_sve_uzp2: |
3742 | return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(), |
3743 | Op.getOperand(1), Op.getOperand(2)); |
3744 | case Intrinsic::aarch64_sve_zip1: |
3745 | return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(), |
3746 | Op.getOperand(1), Op.getOperand(2)); |
3747 | case Intrinsic::aarch64_sve_zip2: |
3748 | return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(), |
3749 | Op.getOperand(1), Op.getOperand(2)); |
3750 | case Intrinsic::aarch64_sve_ptrue: |
3751 | return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(), |
3752 | Op.getOperand(1)); |
3753 | case Intrinsic::aarch64_sve_clz: |
3754 | return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), |
3755 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3756 | case Intrinsic::aarch64_sve_cnt: { |
3757 | SDValue Data = Op.getOperand(3); |
3758 | // CTPOP only supports integer operands. |
3759 | if (Data.getValueType().isFloatingPoint()) |
3760 | Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data); |
3761 | return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(), |
3762 | Op.getOperand(2), Data, Op.getOperand(1)); |
3763 | } |
3764 | case Intrinsic::aarch64_sve_dupq_lane: |
3765 | return LowerDUPQLane(Op, DAG); |
3766 | case Intrinsic::aarch64_sve_convert_from_svbool: |
3767 | return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), |
3768 | Op.getOperand(1)); |
3769 | case Intrinsic::aarch64_sve_fneg: |
3770 | return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3771 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3772 | case Intrinsic::aarch64_sve_frintp: |
3773 | return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(), |
3774 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3775 | case Intrinsic::aarch64_sve_frintm: |
3776 | return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), |
3777 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3778 | case Intrinsic::aarch64_sve_frinti: |
3779 | return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), |
3780 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3781 | case Intrinsic::aarch64_sve_frintx: |
3782 | return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), |
3783 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3784 | case Intrinsic::aarch64_sve_frinta: |
3785 | return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), |
3786 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3787 | case Intrinsic::aarch64_sve_frintn: |
3788 | return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), |
3789 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3790 | case Intrinsic::aarch64_sve_frintz: |
3791 | return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), |
3792 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3793 | case Intrinsic::aarch64_sve_ucvtf: |
3794 | return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl, |
3795 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
3796 | Op.getOperand(1)); |
3797 | case Intrinsic::aarch64_sve_scvtf: |
3798 | return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl, |
3799 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
3800 | Op.getOperand(1)); |
3801 | case Intrinsic::aarch64_sve_fcvtzu: |
3802 | return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, |
3803 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
3804 | Op.getOperand(1)); |
3805 | case Intrinsic::aarch64_sve_fcvtzs: |
3806 | return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, |
3807 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
3808 | Op.getOperand(1)); |
3809 | case Intrinsic::aarch64_sve_fsqrt: |
3810 | return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(), |
3811 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3812 | case Intrinsic::aarch64_sve_frecpx: |
3813 | return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(), |
3814 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3815 | case Intrinsic::aarch64_sve_fabs: |
3816 | return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(), |
3817 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3818 | case Intrinsic::aarch64_sve_abs: |
3819 | return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(), |
3820 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3821 | case Intrinsic::aarch64_sve_neg: |
3822 | return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3823 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3824 | case Intrinsic::aarch64_sve_convert_to_svbool: { |
3825 | EVT OutVT = Op.getValueType(); |
3826 | EVT InVT = Op.getOperand(1).getValueType(); |
3827 | // Return the operand if the cast isn't changing type, |
3828 | // i.e. <n x 16 x i1> -> <n x 16 x i1> |
3829 | if (InVT == OutVT) |
3830 | return Op.getOperand(1); |
3831 | // Otherwise, zero the newly introduced lanes. |
3832 | SDValue Reinterpret = |
3833 | DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1)); |
3834 | SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all); |
3835 | SDValue MaskReinterpret = |
3836 | DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask); |
3837 | return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret); |
3838 | } |
3839 | |
3840 | case Intrinsic::aarch64_sve_insr: { |
3841 | SDValue Scalar = Op.getOperand(2); |
3842 | EVT ScalarTy = Scalar.getValueType(); |
3843 | if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) |
3844 | Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); |
3845 | |
3846 | return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(), |
3847 | Op.getOperand(1), Scalar); |
3848 | } |
3849 | case Intrinsic::aarch64_sve_rbit: |
3850 | return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl, |
3851 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
3852 | Op.getOperand(1)); |
3853 | case Intrinsic::aarch64_sve_revb: |
3854 | return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(), |
3855 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
3856 | case Intrinsic::aarch64_sve_sxtb: |
3857 | return DAG.getNode( |
3858 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3859 | Op.getOperand(2), Op.getOperand(3), |
3860 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), |
3861 | Op.getOperand(1)); |
3862 | case Intrinsic::aarch64_sve_sxth: |
3863 | return DAG.getNode( |
3864 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3865 | Op.getOperand(2), Op.getOperand(3), |
3866 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), |
3867 | Op.getOperand(1)); |
3868 | case Intrinsic::aarch64_sve_sxtw: |
3869 | return DAG.getNode( |
3870 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3871 | Op.getOperand(2), Op.getOperand(3), |
3872 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), |
3873 | Op.getOperand(1)); |
3874 | case Intrinsic::aarch64_sve_uxtb: |
3875 | return DAG.getNode( |
3876 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3877 | Op.getOperand(2), Op.getOperand(3), |
3878 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), |
3879 | Op.getOperand(1)); |
3880 | case Intrinsic::aarch64_sve_uxth: |
3881 | return DAG.getNode( |
3882 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3883 | Op.getOperand(2), Op.getOperand(3), |
3884 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), |
3885 | Op.getOperand(1)); |
3886 | case Intrinsic::aarch64_sve_uxtw: |
3887 | return DAG.getNode( |
3888 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
3889 | Op.getOperand(2), Op.getOperand(3), |
3890 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), |
3891 | Op.getOperand(1)); |
3892 | |
3893 | case Intrinsic::localaddress: { |
3894 | const auto &MF = DAG.getMachineFunction(); |
3895 | const auto *RegInfo = Subtarget->getRegisterInfo(); |
3896 | unsigned Reg = RegInfo->getLocalAddressRegister(MF); |
3897 | return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, |
3898 | Op.getSimpleValueType()); |
3899 | } |
3900 | |
3901 | case Intrinsic::eh_recoverfp: { |
3902 | // FIXME: This needs to be implemented to correctly handle highly aligned |
3903 | // stack objects. For now we simply return the incoming FP. Refer D53541 |
3904 | // for more details. |
3905 | SDValue FnOp = Op.getOperand(1); |
3906 | SDValue IncomingFPOp = Op.getOperand(2); |
3907 | GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); |
3908 | auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); |
3909 | if (!Fn) |
3910 | report_fatal_error( |
3911 | "llvm.eh.recoverfp must take a function as the first argument"); |
3912 | return IncomingFPOp; |
3913 | } |
3914 | |
3915 | case Intrinsic::aarch64_neon_vsri: |
3916 | case Intrinsic::aarch64_neon_vsli: { |
3917 | EVT Ty = Op.getValueType(); |
3918 | |
3919 | if (!Ty.isVector()) |
3920 | report_fatal_error("Unexpected type for aarch64_neon_vsli"); |
3921 | |
3922 | assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())((Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()) ? static_cast<void> (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 3922, __PRETTY_FUNCTION__)); |
3923 | |
3924 | bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri; |
3925 | unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI; |
3926 | return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2), |
3927 | Op.getOperand(3)); |
3928 | } |
3929 | |
3930 | case Intrinsic::aarch64_neon_srhadd: |
3931 | case Intrinsic::aarch64_neon_urhadd: |
3932 | case Intrinsic::aarch64_neon_shadd: |
3933 | case Intrinsic::aarch64_neon_uhadd: { |
3934 | bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || |
3935 | IntNo == Intrinsic::aarch64_neon_shadd); |
3936 | bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || |
3937 | IntNo == Intrinsic::aarch64_neon_urhadd); |
3938 | unsigned Opcode = |
3939 | IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD) |
3940 | : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD); |
3941 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
3942 | Op.getOperand(2)); |
3943 | } |
3944 | case Intrinsic::aarch64_neon_sabd: |
3945 | case Intrinsic::aarch64_neon_uabd: { |
3946 | unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? AArch64ISD::UABD |
3947 | : AArch64ISD::SABD; |
3948 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
3949 | Op.getOperand(2)); |
3950 | } |
3951 | case Intrinsic::aarch64_neon_sdot: |
3952 | case Intrinsic::aarch64_neon_udot: |
3953 | case Intrinsic::aarch64_sve_sdot: |
3954 | case Intrinsic::aarch64_sve_udot: { |
3955 | unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot || |
3956 | IntNo == Intrinsic::aarch64_sve_udot) |
3957 | ? AArch64ISD::UDOT |
3958 | : AArch64ISD::SDOT; |
3959 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
3960 | Op.getOperand(2), Op.getOperand(3)); |
3961 | } |
3962 | } |
3963 | } |
3964 | |
3965 | bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const { |
3966 | if (VT.getVectorElementType() == MVT::i8 || |
3967 | VT.getVectorElementType() == MVT::i16) { |
3968 | EltTy = MVT::i32; |
3969 | return true; |
3970 | } |
3971 | return false; |
3972 | } |
3973 | |
3974 | bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { |
3975 | if (VT.getVectorElementType() == MVT::i32 && |
3976 | VT.getVectorElementCount().getKnownMinValue() >= 4) |
3977 | return true; |
3978 | |
3979 | return false; |
3980 | } |
3981 | |
3982 | bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { |
3983 | return ExtVal.getValueType().isScalableVector(); |
3984 | } |
3985 | |
3986 | unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { |
3987 | std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { |
3988 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false), |
3989 | AArch64ISD::GLD1_MERGE_ZERO}, |
3990 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true), |
3991 | AArch64ISD::GLD1_UXTW_MERGE_ZERO}, |
3992 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false), |
3993 | AArch64ISD::GLD1_MERGE_ZERO}, |
3994 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true), |
3995 | AArch64ISD::GLD1_SXTW_MERGE_ZERO}, |
3996 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false), |
3997 | AArch64ISD::GLD1_SCALED_MERGE_ZERO}, |
3998 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true), |
3999 | AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO}, |
4000 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false), |
4001 | AArch64ISD::GLD1_SCALED_MERGE_ZERO}, |
4002 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true), |
4003 | AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO}, |
4004 | }; |
4005 | auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); |
4006 | return AddrModes.find(Key)->second; |
4007 | } |
4008 | |
4009 | unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { |
4010 | std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { |
4011 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false), |
4012 | AArch64ISD::SST1_PRED}, |
4013 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true), |
4014 | AArch64ISD::SST1_UXTW_PRED}, |
4015 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false), |
4016 | AArch64ISD::SST1_PRED}, |
4017 | {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true), |
4018 | AArch64ISD::SST1_SXTW_PRED}, |
4019 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false), |
4020 | AArch64ISD::SST1_SCALED_PRED}, |
4021 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true), |
4022 | AArch64ISD::SST1_UXTW_SCALED_PRED}, |
4023 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false), |
4024 | AArch64ISD::SST1_SCALED_PRED}, |
4025 | {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true), |
4026 | AArch64ISD::SST1_SXTW_SCALED_PRED}, |
4027 | }; |
4028 | auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); |
4029 | return AddrModes.find(Key)->second; |
4030 | } |
4031 | |
4032 | unsigned getSignExtendedGatherOpcode(unsigned Opcode) { |
4033 | switch (Opcode) { |
4034 | default: |
4035 | llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4035); |
4036 | return Opcode; |
4037 | case AArch64ISD::GLD1_MERGE_ZERO: |
4038 | return AArch64ISD::GLD1S_MERGE_ZERO; |
4039 | case AArch64ISD::GLD1_IMM_MERGE_ZERO: |
4040 | return AArch64ISD::GLD1S_IMM_MERGE_ZERO; |
4041 | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: |
4042 | return AArch64ISD::GLD1S_UXTW_MERGE_ZERO; |
4043 | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: |
4044 | return AArch64ISD::GLD1S_SXTW_MERGE_ZERO; |
4045 | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: |
4046 | return AArch64ISD::GLD1S_SCALED_MERGE_ZERO; |
4047 | case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: |
4048 | return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; |
4049 | case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: |
4050 | return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; |
4051 | } |
4052 | } |
4053 | |
4054 | bool getGatherScatterIndexIsExtended(SDValue Index) { |
4055 | unsigned Opcode = Index.getOpcode(); |
4056 | if (Opcode == ISD::SIGN_EXTEND_INREG) |
4057 | return true; |
4058 | |
4059 | if (Opcode == ISD::AND) { |
4060 | SDValue Splat = Index.getOperand(1); |
4061 | if (Splat.getOpcode() != ISD::SPLAT_VECTOR) |
4062 | return false; |
4063 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0)); |
4064 | if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF) |
4065 | return false; |
4066 | return true; |
4067 | } |
4068 | |
4069 | return false; |
4070 | } |
4071 | |
4072 | // If the base pointer of a masked gather or scatter is null, we |
4073 | // may be able to swap BasePtr & Index and use the vector + register |
4074 | // or vector + immediate addressing mode, e.g. |
4075 | // VECTOR + REGISTER: |
4076 | // getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices) |
4077 | // -> getelementptr %offset, <vscale x N x T> %indices |
4078 | // VECTOR + IMMEDIATE: |
4079 | // getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices) |
4080 | // -> getelementptr #x, <vscale x N x T> %indices |
4081 | void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, |
4082 | unsigned &Opcode, bool IsGather, |
4083 | SelectionDAG &DAG) { |
4084 | if (!isNullConstant(BasePtr)) |
4085 | return; |
4086 | |
4087 | ConstantSDNode *Offset = nullptr; |
4088 | if (Index.getOpcode() == ISD::ADD) |
4089 | if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) { |
4090 | if (isa<ConstantSDNode>(SplatVal)) |
4091 | Offset = cast<ConstantSDNode>(SplatVal); |
4092 | else { |
4093 | BasePtr = SplatVal; |
4094 | Index = Index->getOperand(0); |
4095 | return; |
4096 | } |
4097 | } |
4098 | |
4099 | unsigned NewOp = |
4100 | IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED; |
4101 | |
4102 | if (!Offset) { |
4103 | std::swap(BasePtr, Index); |
4104 | Opcode = NewOp; |
4105 | return; |
4106 | } |
4107 | |
4108 | uint64_t OffsetVal = Offset->getZExtValue(); |
4109 | unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8; |
4110 | auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64); |
4111 | |
4112 | if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) { |
4113 | // Index is out of range for the immediate addressing mode |
4114 | BasePtr = ConstOffset; |
4115 | Index = Index->getOperand(0); |
4116 | return; |
4117 | } |
4118 | |
4119 | // Immediate is in range |
4120 | Opcode = NewOp; |
4121 | BasePtr = Index->getOperand(0); |
4122 | Index = ConstOffset; |
4123 | } |
4124 | |
4125 | SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, |
4126 | SelectionDAG &DAG) const { |
4127 | SDLoc DL(Op); |
4128 | MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op); |
4129 | assert(MGT && "Can only custom lower gather load nodes")((MGT && "Can only custom lower gather load nodes") ? static_cast<void> (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4129, __PRETTY_FUNCTION__)); |
4130 | |
4131 | SDValue Index = MGT->getIndex(); |
4132 | SDValue Chain = MGT->getChain(); |
4133 | SDValue PassThru = MGT->getPassThru(); |
4134 | SDValue Mask = MGT->getMask(); |
4135 | SDValue BasePtr = MGT->getBasePtr(); |
4136 | ISD::LoadExtType ExtTy = MGT->getExtensionType(); |
4137 | |
4138 | ISD::MemIndexType IndexType = MGT->getIndexType(); |
4139 | bool IsScaled = |
4140 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; |
4141 | bool IsSigned = |
4142 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; |
4143 | bool IdxNeedsExtend = |
4144 | getGatherScatterIndexIsExtended(Index) || |
4145 | Index.getSimpleValueType().getVectorElementType() == MVT::i32; |
4146 | bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; |
4147 | |
4148 | EVT VT = PassThru.getSimpleValueType(); |
4149 | EVT MemVT = MGT->getMemoryVT(); |
4150 | SDValue InputVT = DAG.getValueType(MemVT); |
4151 | |
4152 | if (VT.getVectorElementType() == MVT::bf16 && |
4153 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
4154 | return SDValue(); |
4155 | |
4156 | // Handle FP data by using an integer gather and casting the result. |
4157 | if (VT.isFloatingPoint()) { |
4158 | EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); |
4159 | PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); |
4160 | InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); |
4161 | } |
4162 | |
4163 | SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other); |
4164 | |
4165 | if (getGatherScatterIndexIsExtended(Index)) |
4166 | Index = Index.getOperand(0); |
4167 | |
4168 | unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); |
4169 | selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, |
4170 | /*isGather=*/true, DAG); |
4171 | |
4172 | if (ResNeedsSignExtend) |
4173 | Opcode = getSignExtendedGatherOpcode(Opcode); |
4174 | |
4175 | SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru}; |
4176 | SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops); |
4177 | |
4178 | if (VT.isFloatingPoint()) { |
4179 | SDValue Cast = getSVESafeBitCast(VT, Gather, DAG); |
4180 | return DAG.getMergeValues({Cast, Gather.getValue(1)}, DL); |
4181 | } |
4182 | |
4183 | return Gather; |
4184 | } |
4185 | |
4186 | SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, |
4187 | SelectionDAG &DAG) const { |
4188 | SDLoc DL(Op); |
4189 | MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op); |
4190 | assert(MSC && "Can only custom lower scatter store nodes")((MSC && "Can only custom lower scatter store nodes") ? static_cast<void> (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4190, __PRETTY_FUNCTION__)); |
4191 | |
4192 | SDValue Index = MSC->getIndex(); |
4193 | SDValue Chain = MSC->getChain(); |
4194 | SDValue StoreVal = MSC->getValue(); |
4195 | SDValue Mask = MSC->getMask(); |
4196 | SDValue BasePtr = MSC->getBasePtr(); |
4197 | |
4198 | ISD::MemIndexType IndexType = MSC->getIndexType(); |
4199 | bool IsScaled = |
4200 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; |
4201 | bool IsSigned = |
4202 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; |
4203 | bool NeedsExtend = |
4204 | getGatherScatterIndexIsExtended(Index) || |
4205 | Index.getSimpleValueType().getVectorElementType() == MVT::i32; |
4206 | |
4207 | EVT VT = StoreVal.getSimpleValueType(); |
4208 | SDVTList VTs = DAG.getVTList(MVT::Other); |
4209 | EVT MemVT = MSC->getMemoryVT(); |
4210 | SDValue InputVT = DAG.getValueType(MemVT); |
4211 | |
4212 | if (VT.getVectorElementType() == MVT::bf16 && |
4213 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
4214 | return SDValue(); |
4215 | |
4216 | // Handle FP data by casting the data so an integer scatter can be used. |
4217 | if (VT.isFloatingPoint()) { |
4218 | EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); |
4219 | StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); |
4220 | InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); |
4221 | } |
4222 | |
4223 | if (getGatherScatterIndexIsExtended(Index)) |
4224 | Index = Index.getOperand(0); |
4225 | |
4226 | unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend); |
4227 | selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, |
4228 | /*isGather=*/false, DAG); |
4229 | |
4230 | SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; |
4231 | return DAG.getNode(Opcode, DL, VTs, Ops); |
4232 | } |
4233 | |
4234 | // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16. |
4235 | static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, |
4236 | EVT VT, EVT MemVT, |
4237 | SelectionDAG &DAG) { |
4238 | assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast <void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4238, __PRETTY_FUNCTION__)); |
4239 | assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast <void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4239, __PRETTY_FUNCTION__)); |
4240 | |
4241 | SDValue Value = ST->getValue(); |
4242 | |
4243 | // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract |
4244 | // the word lane which represent the v4i8 subvector. It optimizes the store |
4245 | // to: |
4246 | // |
4247 | // xtn v0.8b, v0.8h |
4248 | // str s0, [x0] |
4249 | |
4250 | SDValue Undef = DAG.getUNDEF(MVT::i16); |
4251 | SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL, |
4252 | {Undef, Undef, Undef, Undef}); |
4253 | |
4254 | SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, |
4255 | Value, UndefVec); |
4256 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt); |
4257 | |
4258 | Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc); |
4259 | SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, |
4260 | Trunc, DAG.getConstant(0, DL, MVT::i64)); |
4261 | |
4262 | return DAG.getStore(ST->getChain(), DL, ExtractTrunc, |
4263 | ST->getBasePtr(), ST->getMemOperand()); |
4264 | } |
4265 | |
4266 | // Custom lowering for any store, vector or scalar and/or default or with |
4267 | // a truncate operations. Currently only custom lower truncate operation |
4268 | // from vector v4i16 to v4i8 or volatile stores of i128. |
4269 | SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, |
4270 | SelectionDAG &DAG) const { |
4271 | SDLoc Dl(Op); |
4272 | StoreSDNode *StoreNode = cast<StoreSDNode>(Op); |
4273 | assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ? static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4273, __PRETTY_FUNCTION__)); |
4274 | |
4275 | SDValue Value = StoreNode->getValue(); |
4276 | |
4277 | EVT VT = Value.getValueType(); |
4278 | EVT MemVT = StoreNode->getMemoryVT(); |
4279 | |
4280 | if (VT.isVector()) { |
4281 | if (useSVEForFixedLengthVectorVT(VT)) |
4282 | return LowerFixedLengthVectorStoreToSVE(Op, DAG); |
4283 | |
4284 | unsigned AS = StoreNode->getAddressSpace(); |
4285 | Align Alignment = StoreNode->getAlign(); |
4286 | if (Alignment < MemVT.getStoreSize() && |
4287 | !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment, |
4288 | StoreNode->getMemOperand()->getFlags(), |
4289 | nullptr)) { |
4290 | return scalarizeVectorStore(StoreNode, DAG); |
4291 | } |
4292 | |
4293 | if (StoreNode->isTruncatingStore()) { |
4294 | return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); |
4295 | } |
4296 | // 256 bit non-temporal stores can be lowered to STNP. Do this as part of |
4297 | // the custom lowering, as there are no un-paired non-temporal stores and |
4298 | // legalization will break up 256 bit inputs. |
4299 | ElementCount EC = MemVT.getVectorElementCount(); |
4300 | if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && |
4301 | EC.isKnownEven() && |
4302 | ((MemVT.getScalarSizeInBits() == 8u || |
4303 | MemVT.getScalarSizeInBits() == 16u || |
4304 | MemVT.getScalarSizeInBits() == 32u || |
4305 | MemVT.getScalarSizeInBits() == 64u))) { |
4306 | SDValue Lo = |
4307 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, |
4308 | MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), |
4309 | StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64)); |
4310 | SDValue Hi = |
4311 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, |
4312 | MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), |
4313 | StoreNode->getValue(), |
4314 | DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); |
4315 | SDValue Result = DAG.getMemIntrinsicNode( |
4316 | AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other), |
4317 | {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, |
4318 | StoreNode->getMemoryVT(), StoreNode->getMemOperand()); |
4319 | return Result; |
4320 | } |
4321 | } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) { |
4322 | assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ? static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4322, __PRETTY_FUNCTION__)); |
4323 | SDValue Lo = |
4324 | DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), |
4325 | DAG.getConstant(0, Dl, MVT::i64)); |
4326 | SDValue Hi = |
4327 | DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), |
4328 | DAG.getConstant(1, Dl, MVT::i64)); |
4329 | SDValue Result = DAG.getMemIntrinsicNode( |
4330 | AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other), |
4331 | {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, |
4332 | StoreNode->getMemoryVT(), StoreNode->getMemOperand()); |
4333 | return Result; |
4334 | } |
4335 | |
4336 | return SDValue(); |
4337 | } |
4338 | |
4339 | // Generate SUBS and CSEL for integer abs. |
4340 | SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { |
4341 | MVT VT = Op.getSimpleValueType(); |
4342 | |
4343 | if (VT.isVector()) |
4344 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); |
4345 | |
4346 | SDLoc DL(Op); |
4347 | SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
4348 | Op.getOperand(0)); |
4349 | // Generate SUBS & CSEL. |
4350 | SDValue Cmp = |
4351 | DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), |
4352 | Op.getOperand(0), DAG.getConstant(0, DL, VT)); |
4353 | return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, |
4354 | DAG.getConstant(AArch64CC::PL, DL, MVT::i32), |
4355 | Cmp.getValue(1)); |
4356 | } |
4357 | |
4358 | SDValue AArch64TargetLowering::LowerOperation(SDValue Op, |
4359 | SelectionDAG &DAG) const { |
4360 | LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "Custom lowering: "; } } while (false); |
4361 | LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { Op.dump(); } } while (false); |
4362 | |
4363 | switch (Op.getOpcode()) { |
4364 | default: |
4365 | llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4365); |
4366 | return SDValue(); |
4367 | case ISD::BITCAST: |
4368 | return LowerBITCAST(Op, DAG); |
4369 | case ISD::GlobalAddress: |
4370 | return LowerGlobalAddress(Op, DAG); |
4371 | case ISD::GlobalTLSAddress: |
4372 | return LowerGlobalTLSAddress(Op, DAG); |
4373 | case ISD::SETCC: |
4374 | case ISD::STRICT_FSETCC: |
4375 | case ISD::STRICT_FSETCCS: |
4376 | return LowerSETCC(Op, DAG); |
4377 | case ISD::BR_CC: |
4378 | return LowerBR_CC(Op, DAG); |
4379 | case ISD::SELECT: |
4380 | return LowerSELECT(Op, DAG); |
4381 | case ISD::SELECT_CC: |
4382 | return LowerSELECT_CC(Op, DAG); |
4383 | case ISD::JumpTable: |
4384 | return LowerJumpTable(Op, DAG); |
4385 | case ISD::BR_JT: |
4386 | return LowerBR_JT(Op, DAG); |
4387 | case ISD::ConstantPool: |
4388 | return LowerConstantPool(Op, DAG); |
4389 | case ISD::BlockAddress: |
4390 | return LowerBlockAddress(Op, DAG); |
4391 | case ISD::VASTART: |
4392 | return LowerVASTART(Op, DAG); |
4393 | case ISD::VACOPY: |
4394 | return LowerVACOPY(Op, DAG); |
4395 | case ISD::VAARG: |
4396 | return LowerVAARG(Op, DAG); |
4397 | case ISD::ADDC: |
4398 | case ISD::ADDE: |
4399 | case ISD::SUBC: |
4400 | case ISD::SUBE: |
4401 | return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); |
4402 | case ISD::SADDO: |
4403 | case ISD::UADDO: |
4404 | case ISD::SSUBO: |
4405 | case ISD::USUBO: |
4406 | case ISD::SMULO: |
4407 | case ISD::UMULO: |
4408 | return LowerXALUO(Op, DAG); |
4409 | case ISD::FADD: |
4410 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); |
4411 | case ISD::FSUB: |
4412 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED); |
4413 | case ISD::FMUL: |
4414 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED); |
4415 | case ISD::FMA: |
4416 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED); |
4417 | case ISD::FDIV: |
4418 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); |
4419 | case ISD::FNEG: |
4420 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); |
4421 | case ISD::FCEIL: |
4422 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU); |
4423 | case ISD::FFLOOR: |
4424 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU); |
4425 | case ISD::FNEARBYINT: |
4426 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU); |
4427 | case ISD::FRINT: |
4428 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU); |
4429 | case ISD::FROUND: |
4430 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU); |
4431 | case ISD::FROUNDEVEN: |
4432 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU); |
4433 | case ISD::FTRUNC: |
4434 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); |
4435 | case ISD::FSQRT: |
4436 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU); |
4437 | case ISD::FABS: |
4438 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU); |
4439 | case ISD::FP_ROUND: |
4440 | case ISD::STRICT_FP_ROUND: |
4441 | return LowerFP_ROUND(Op, DAG); |
4442 | case ISD::FP_EXTEND: |
4443 | return LowerFP_EXTEND(Op, DAG); |
4444 | case ISD::FRAMEADDR: |
4445 | return LowerFRAMEADDR(Op, DAG); |
4446 | case ISD::SPONENTRY: |
4447 | return LowerSPONENTRY(Op, DAG); |
4448 | case ISD::RETURNADDR: |
4449 | return LowerRETURNADDR(Op, DAG); |
4450 | case ISD::ADDROFRETURNADDR: |
4451 | return LowerADDROFRETURNADDR(Op, DAG); |
4452 | case ISD::CONCAT_VECTORS: |
4453 | return LowerCONCAT_VECTORS(Op, DAG); |
4454 | case ISD::INSERT_VECTOR_ELT: |
4455 | return LowerINSERT_VECTOR_ELT(Op, DAG); |
4456 | case ISD::EXTRACT_VECTOR_ELT: |
4457 | return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
4458 | case ISD::BUILD_VECTOR: |
4459 | return LowerBUILD_VECTOR(Op, DAG); |
4460 | case ISD::VECTOR_SHUFFLE: |
4461 | return LowerVECTOR_SHUFFLE(Op, DAG); |
4462 | case ISD::SPLAT_VECTOR: |
4463 | return LowerSPLAT_VECTOR(Op, DAG); |
4464 | case ISD::STEP_VECTOR: |
4465 | return LowerSTEP_VECTOR(Op, DAG); |
4466 | case ISD::EXTRACT_SUBVECTOR: |
4467 | return LowerEXTRACT_SUBVECTOR(Op, DAG); |
4468 | case ISD::INSERT_SUBVECTOR: |
4469 | return LowerINSERT_SUBVECTOR(Op, DAG); |
4470 | case ISD::SDIV: |
4471 | case ISD::UDIV: |
4472 | return LowerDIV(Op, DAG); |
4473 | case ISD::SMIN: |
4474 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, |
4475 | /*OverrideNEON=*/true); |
4476 | case ISD::UMIN: |
4477 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, |
4478 | /*OverrideNEON=*/true); |
4479 | case ISD::SMAX: |
4480 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, |
4481 | /*OverrideNEON=*/true); |
4482 | case ISD::UMAX: |
4483 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, |
4484 | /*OverrideNEON=*/true); |
4485 | case ISD::SRA: |
4486 | case ISD::SRL: |
4487 | case ISD::SHL: |
4488 | return LowerVectorSRA_SRL_SHL(Op, DAG); |
4489 | case ISD::SHL_PARTS: |
4490 | return LowerShiftLeftParts(Op, DAG); |
4491 | case ISD::SRL_PARTS: |
4492 | case ISD::SRA_PARTS: |
4493 | return LowerShiftRightParts(Op, DAG); |
4494 | case ISD::CTPOP: |
4495 | return LowerCTPOP(Op, DAG); |
4496 | case ISD::FCOPYSIGN: |
4497 | return LowerFCOPYSIGN(Op, DAG); |
4498 | case ISD::OR: |
4499 | return LowerVectorOR(Op, DAG); |
4500 | case ISD::XOR: |
4501 | return LowerXOR(Op, DAG); |
4502 | case ISD::PREFETCH: |
4503 | return LowerPREFETCH(Op, DAG); |
4504 | case ISD::SINT_TO_FP: |
4505 | case ISD::UINT_TO_FP: |
4506 | case ISD::STRICT_SINT_TO_FP: |
4507 | case ISD::STRICT_UINT_TO_FP: |
4508 | return LowerINT_TO_FP(Op, DAG); |
4509 | case ISD::FP_TO_SINT: |
4510 | case ISD::FP_TO_UINT: |
4511 | case ISD::STRICT_FP_TO_SINT: |
4512 | case ISD::STRICT_FP_TO_UINT: |
4513 | return LowerFP_TO_INT(Op, DAG); |
4514 | case ISD::FSINCOS: |
4515 | return LowerFSINCOS(Op, DAG); |
4516 | case ISD::FLT_ROUNDS_: |
4517 | return LowerFLT_ROUNDS_(Op, DAG); |
4518 | case ISD::SET_ROUNDING: |
4519 | return LowerSET_ROUNDING(Op, DAG); |
4520 | case ISD::MUL: |
4521 | return LowerMUL(Op, DAG); |
4522 | case ISD::INTRINSIC_WO_CHAIN: |
4523 | return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
4524 | case ISD::STORE: |
4525 | return LowerSTORE(Op, DAG); |
4526 | case ISD::MGATHER: |
4527 | return LowerMGATHER(Op, DAG); |
4528 | case ISD::MSCATTER: |
4529 | return LowerMSCATTER(Op, DAG); |
4530 | case ISD::VECREDUCE_SEQ_FADD: |
4531 | return LowerVECREDUCE_SEQ_FADD(Op, DAG); |
4532 | case ISD::VECREDUCE_ADD: |
4533 | case ISD::VECREDUCE_AND: |
4534 | case ISD::VECREDUCE_OR: |
4535 | case ISD::VECREDUCE_XOR: |
4536 | case ISD::VECREDUCE_SMAX: |
4537 | case ISD::VECREDUCE_SMIN: |
4538 | case ISD::VECREDUCE_UMAX: |
4539 | case ISD::VECREDUCE_UMIN: |
4540 | case ISD::VECREDUCE_FADD: |
4541 | case ISD::VECREDUCE_FMAX: |
4542 | case ISD::VECREDUCE_FMIN: |
4543 | return LowerVECREDUCE(Op, DAG); |
4544 | case ISD::ATOMIC_LOAD_SUB: |
4545 | return LowerATOMIC_LOAD_SUB(Op, DAG); |
4546 | case ISD::ATOMIC_LOAD_AND: |
4547 | return LowerATOMIC_LOAD_AND(Op, DAG); |
4548 | case ISD::DYNAMIC_STACKALLOC: |
4549 | return LowerDYNAMIC_STACKALLOC(Op, DAG); |
4550 | case ISD::VSCALE: |
4551 | return LowerVSCALE(Op, DAG); |
4552 | case ISD::ANY_EXTEND: |
4553 | case ISD::SIGN_EXTEND: |
4554 | case ISD::ZERO_EXTEND: |
4555 | return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); |
4556 | case ISD::SIGN_EXTEND_INREG: { |
4557 | // Only custom lower when ExtraVT has a legal byte based element type. |
4558 | EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
4559 | EVT ExtraEltVT = ExtraVT.getVectorElementType(); |
4560 | if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) && |
4561 | (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64)) |
4562 | return SDValue(); |
4563 | |
4564 | return LowerToPredicatedOp(Op, DAG, |
4565 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU); |
4566 | } |
4567 | case ISD::TRUNCATE: |
4568 | return LowerTRUNCATE(Op, DAG); |
4569 | case ISD::LOAD: |
4570 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
4571 | return LowerFixedLengthVectorLoadToSVE(Op, DAG); |
4572 | llvm_unreachable("Unexpected request to lower ISD::LOAD")::llvm::llvm_unreachable_internal("Unexpected request to lower ISD::LOAD" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4572); |
4573 | case ISD::ADD: |
4574 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); |
4575 | case ISD::AND: |
4576 | return LowerToScalableOp(Op, DAG); |
4577 | case ISD::SUB: |
4578 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED); |
4579 | case ISD::FMAXIMUM: |
4580 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED); |
4581 | case ISD::FMAXNUM: |
4582 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED); |
4583 | case ISD::FMINIMUM: |
4584 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED); |
4585 | case ISD::FMINNUM: |
4586 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED); |
4587 | case ISD::VSELECT: |
4588 | return LowerFixedLengthVectorSelectToSVE(Op, DAG); |
4589 | case ISD::ABS: |
4590 | return LowerABS(Op, DAG); |
4591 | case ISD::BITREVERSE: |
4592 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU, |
4593 | /*OverrideNEON=*/true); |
4594 | case ISD::BSWAP: |
4595 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU); |
4596 | case ISD::CTLZ: |
4597 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU, |
4598 | /*OverrideNEON=*/true); |
4599 | case ISD::CTTZ: |
4600 | return LowerCTTZ(Op, DAG); |
4601 | } |
4602 | } |
4603 | |
4604 | bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
4605 | return !Subtarget->useSVEForFixedLengthVectors(); |
4606 | } |
4607 | |
4608 | bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( |
4609 | EVT VT, bool OverrideNEON) const { |
4610 | if (!Subtarget->useSVEForFixedLengthVectors()) |
4611 | return false; |
4612 | |
4613 | if (!VT.isFixedLengthVector()) |
4614 | return false; |
4615 | |
4616 | // Don't use SVE for vectors we cannot scalarize if required. |
4617 | switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { |
4618 | // Fixed length predicates should be promoted to i8. |
4619 | // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work. |
4620 | case MVT::i1: |
4621 | default: |
4622 | return false; |
4623 | case MVT::i8: |
4624 | case MVT::i16: |
4625 | case MVT::i32: |
4626 | case MVT::i64: |
4627 | case MVT::f16: |
4628 | case MVT::f32: |
4629 | case MVT::f64: |
4630 | break; |
4631 | } |
4632 | |
4633 | // All SVE implementations support NEON sized vectors. |
4634 | if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector())) |
4635 | return true; |
4636 | |
4637 | // Ensure NEON MVTs only belong to a single register class. |
4638 | if (VT.getFixedSizeInBits() <= 128) |
4639 | return false; |
4640 | |
4641 | // Don't use SVE for types that don't fit. |
4642 | if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits()) |
4643 | return false; |
4644 | |
4645 | // TODO: Perhaps an artificial restriction, but worth having whilst getting |
4646 | // the base fixed length SVE support in place. |
4647 | if (!VT.isPow2VectorType()) |
4648 | return false; |
4649 | |
4650 | return true; |
4651 | } |
4652 | |
4653 | //===----------------------------------------------------------------------===// |
4654 | // Calling Convention Implementation |
4655 | //===----------------------------------------------------------------------===// |
4656 | |
4657 | /// Selects the correct CCAssignFn for a given CallingConvention value. |
4658 | CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
4659 | bool IsVarArg) const { |
4660 | switch (CC) { |
4661 | default: |
4662 | report_fatal_error("Unsupported calling convention."); |
4663 | case CallingConv::WebKit_JS: |
4664 | return CC_AArch64_WebKit_JS; |
4665 | case CallingConv::GHC: |
4666 | return CC_AArch64_GHC; |
4667 | case CallingConv::C: |
4668 | case CallingConv::Fast: |
4669 | case CallingConv::PreserveMost: |
4670 | case CallingConv::CXX_FAST_TLS: |
4671 | case CallingConv::Swift: |
4672 | if (Subtarget->isTargetWindows() && IsVarArg) |
4673 | return CC_AArch64_Win64_VarArg; |
4674 | if (!Subtarget->isTargetDarwin()) |
4675 | return CC_AArch64_AAPCS; |
4676 | if (!IsVarArg) |
4677 | return CC_AArch64_DarwinPCS; |
4678 | return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg |
4679 | : CC_AArch64_DarwinPCS_VarArg; |
4680 | case CallingConv::Win64: |
4681 | return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; |
4682 | case CallingConv::CFGuard_Check: |
4683 | return CC_AArch64_Win64_CFGuard_Check; |
4684 | case CallingConv::AArch64_VectorCall: |
4685 | case CallingConv::AArch64_SVE_VectorCall: |
4686 | return CC_AArch64_AAPCS; |
4687 | } |
4688 | } |
4689 | |
4690 | CCAssignFn * |
4691 | AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { |
4692 | return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS |
4693 | : RetCC_AArch64_AAPCS; |
4694 | } |
4695 | |
4696 | SDValue AArch64TargetLowering::LowerFormalArguments( |
4697 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
4698 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
4699 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
4700 | MachineFunction &MF = DAG.getMachineFunction(); |
4701 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4702 | bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); |
4703 | |
4704 | // Assign locations to all of the incoming arguments. |
4705 | SmallVector<CCValAssign, 16> ArgLocs; |
4706 | DenseMap<unsigned, SDValue> CopiedRegs; |
4707 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
4708 | *DAG.getContext()); |
4709 | |
4710 | // At this point, Ins[].VT may already be promoted to i32. To correctly |
4711 | // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and |
4712 | // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. |
4713 | // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here |
4714 | // we use a special version of AnalyzeFormalArguments to pass in ValVT and |
4715 | // LocVT. |
4716 | unsigned NumArgs = Ins.size(); |
4717 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); |
4718 | unsigned CurArgIdx = 0; |
4719 | for (unsigned i = 0; i != NumArgs; ++i) { |
4720 | MVT ValVT = Ins[i].VT; |
4721 | if (Ins[i].isOrigArg()) { |
4722 | std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx); |
4723 | CurArgIdx = Ins[i].getOrigArgIndex(); |
4724 | |
4725 | // Get type of the original argument. |
4726 | EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(), |
4727 | /*AllowUnknown*/ true); |
4728 | MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; |
4729 | // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. |
4730 | if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) |
4731 | ValVT = MVT::i8; |
4732 | else if (ActualMVT == MVT::i16) |
4733 | ValVT = MVT::i16; |
4734 | } |
4735 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); |
4736 | bool Res = |
4737 | AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); |
4738 | assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast <void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4738, __PRETTY_FUNCTION__)); |
4739 | (void)Res; |
4740 | } |
4741 | SmallVector<SDValue, 16> ArgValues; |
4742 | unsigned ExtraArgLocs = 0; |
4743 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4744 | CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; |
4745 | |
4746 | if (Ins[i].Flags.isByVal()) { |
4747 | // Byval is used for HFAs in the PCS, but the system should work in a |
4748 | // non-compliant manner for larger structs. |
4749 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
4750 | int Size = Ins[i].Flags.getByValSize(); |
4751 | unsigned NumRegs = (Size + 7) / 8; |
4752 | |
4753 | // FIXME: This works on big-endian for composite byvals, which are the common |
4754 | // case. It should also work for fundamental types too. |
4755 | unsigned FrameIdx = |
4756 | MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); |
4757 | SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT); |
4758 | InVals.push_back(FrameIdxN); |
4759 | |
4760 | continue; |
4761 | } |
4762 | |
4763 | SDValue ArgValue; |
4764 | if (VA.isRegLoc()) { |
4765 | // Arguments stored in registers. |
4766 | EVT RegVT = VA.getLocVT(); |
4767 | const TargetRegisterClass *RC; |
4768 | |
4769 | if (RegVT == MVT::i32) |
4770 | RC = &AArch64::GPR32RegClass; |
4771 | else if (RegVT == MVT::i64) |
4772 | RC = &AArch64::GPR64RegClass; |
4773 | else if (RegVT == MVT::f16 || RegVT == MVT::bf16) |
4774 | RC = &AArch64::FPR16RegClass; |
4775 | else if (RegVT == MVT::f32) |
4776 | RC = &AArch64::FPR32RegClass; |
4777 | else if (RegVT == MVT::f64 || RegVT.is64BitVector()) |
4778 | RC = &AArch64::FPR64RegClass; |
4779 | else if (RegVT == MVT::f128 || RegVT.is128BitVector()) |
4780 | RC = &AArch64::FPR128RegClass; |
4781 | else if (RegVT.isScalableVector() && |
4782 | RegVT.getVectorElementType() == MVT::i1) |
4783 | RC = &AArch64::PPRRegClass; |
4784 | else if (RegVT.isScalableVector()) |
4785 | RC = &AArch64::ZPRRegClass; |
4786 | else |
4787 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4787); |
4788 | |
4789 | // Transform the arguments in physical registers into virtual ones. |
4790 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); |
4791 | ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); |
4792 | |
4793 | // If this is an 8, 16 or 32-bit value, it is really passed promoted |
4794 | // to 64 bits. Insert an assert[sz]ext to capture this, then |
4795 | // truncate to the right size. |
4796 | switch (VA.getLocInfo()) { |
4797 | default: |
4798 | llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4798); |
4799 | case CCValAssign::Full: |
4800 | break; |
4801 | case CCValAssign::Indirect: |
4802 | assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4803, __PRETTY_FUNCTION__)) |
4803 | "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4803, __PRETTY_FUNCTION__)); |
4804 | break; |
4805 | case CCValAssign::BCvt: |
4806 | ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); |
4807 | break; |
4808 | case CCValAssign::AExt: |
4809 | case CCValAssign::SExt: |
4810 | case CCValAssign::ZExt: |
4811 | break; |
4812 | case CCValAssign::AExtUpper: |
4813 | ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, |
4814 | DAG.getConstant(32, DL, RegVT)); |
4815 | ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); |
4816 | break; |
4817 | } |
4818 | } else { // VA.isRegLoc() |
4819 | assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem" ) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4819, __PRETTY_FUNCTION__)); |
4820 | unsigned ArgOffset = VA.getLocMemOffset(); |
4821 | unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect |
4822 | ? VA.getLocVT().getSizeInBits() |
4823 | : VA.getValVT().getSizeInBits()) / 8; |
4824 | |
4825 | uint32_t BEAlign = 0; |
4826 | if (!Subtarget->isLittleEndian() && ArgSize < 8 && |
4827 | !Ins[i].Flags.isInConsecutiveRegs()) |
4828 | BEAlign = 8 - ArgSize; |
4829 | |
4830 | int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); |
4831 | |
4832 | // Create load nodes to retrieve arguments from the stack. |
4833 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
4834 | |
4835 | // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) |
4836 | ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; |
4837 | MVT MemVT = VA.getValVT(); |
4838 | |
4839 | switch (VA.getLocInfo()) { |
4840 | default: |
4841 | break; |
4842 | case CCValAssign::Trunc: |
4843 | case CCValAssign::BCvt: |
4844 | MemVT = VA.getLocVT(); |
4845 | break; |
4846 | case CCValAssign::Indirect: |
4847 | assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4848, __PRETTY_FUNCTION__)) |
4848 | "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4848, __PRETTY_FUNCTION__)); |
4849 | MemVT = VA.getLocVT(); |
4850 | break; |
4851 | case CCValAssign::SExt: |
4852 | ExtType = ISD::SEXTLOAD; |
4853 | break; |
4854 | case CCValAssign::ZExt: |
4855 | ExtType = ISD::ZEXTLOAD; |
4856 | break; |
4857 | case CCValAssign::AExt: |
4858 | ExtType = ISD::EXTLOAD; |
4859 | break; |
4860 | } |
4861 | |
4862 | ArgValue = DAG.getExtLoad( |
4863 | ExtType, DL, VA.getLocVT(), Chain, FIN, |
4864 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), |
4865 | MemVT); |
4866 | |
4867 | } |
4868 | |
4869 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
4870 | assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4871, __PRETTY_FUNCTION__)) |
4871 | "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4871, __PRETTY_FUNCTION__)); |
4872 | |
4873 | uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize(); |
4874 | unsigned NumParts = 1; |
4875 | if (Ins[i].Flags.isInConsecutiveRegs()) { |
4876 | assert(!Ins[i].Flags.isInConsecutiveRegsLast())((!Ins[i].Flags.isInConsecutiveRegsLast()) ? static_cast<void > (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4876, __PRETTY_FUNCTION__)); |
4877 | while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) |
4878 | ++NumParts; |
4879 | } |
4880 | |
4881 | MVT PartLoad = VA.getValVT(); |
4882 | SDValue Ptr = ArgValue; |
4883 | |
4884 | // Ensure we generate all loads for each tuple part, whilst updating the |
4885 | // pointer after each load correctly using vscale. |
4886 | while (NumParts > 0) { |
4887 | ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo()); |
4888 | InVals.push_back(ArgValue); |
4889 | NumParts--; |
4890 | if (NumParts > 0) { |
4891 | SDValue BytesIncrement = DAG.getVScale( |
4892 | DL, Ptr.getValueType(), |
4893 | APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); |
4894 | SDNodeFlags Flags; |
4895 | Flags.setNoUnsignedWrap(true); |
4896 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
4897 | BytesIncrement, Flags); |
4898 | ExtraArgLocs++; |
4899 | i++; |
4900 | } |
4901 | } |
4902 | } else { |
4903 | if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) |
4904 | ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), |
4905 | ArgValue, DAG.getValueType(MVT::i32)); |
4906 | InVals.push_back(ArgValue); |
4907 | } |
4908 | } |
4909 | assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(((ArgLocs.size() + ExtraArgLocs) == Ins.size()) ? static_cast <void> (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4909, __PRETTY_FUNCTION__)); |
4910 | |
4911 | // varargs |
4912 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
4913 | if (isVarArg) { |
4914 | if (!Subtarget->isTargetDarwin() || IsWin64) { |
4915 | // The AAPCS variadic function ABI is identical to the non-variadic |
4916 | // one. As a result there may be more arguments in registers and we should |
4917 | // save them for future reference. |
4918 | // Win64 variadic functions also pass arguments in registers, but all float |
4919 | // arguments are passed in integer registers. |
4920 | saveVarArgRegisters(CCInfo, DAG, DL, Chain); |
4921 | } |
4922 | |
4923 | // This will point to the next argument passed via stack. |
4924 | unsigned StackOffset = CCInfo.getNextStackOffset(); |
4925 | // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 |
4926 | StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); |
4927 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); |
4928 | |
4929 | if (MFI.hasMustTailInVarArgFunc()) { |
4930 | SmallVector<MVT, 2> RegParmTypes; |
4931 | RegParmTypes.push_back(MVT::i64); |
4932 | RegParmTypes.push_back(MVT::f128); |
4933 | // Compute the set of forwarded registers. The rest are scratch. |
4934 | SmallVectorImpl<ForwardedRegister> &Forwards = |
4935 | FuncInfo->getForwardedMustTailRegParms(); |
4936 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, |
4937 | CC_AArch64_AAPCS); |
4938 | |
4939 | // Conservatively forward X8, since it might be used for aggregate return. |
4940 | if (!CCInfo.isAllocated(AArch64::X8)) { |
4941 | unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); |
4942 | Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
4943 | } |
4944 | } |
4945 | } |
4946 | |
4947 | // On Windows, InReg pointers must be returned, so record the pointer in a |
4948 | // virtual register at the start of the function so it can be returned in the |
4949 | // epilogue. |
4950 | if (IsWin64) { |
4951 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
4952 | if (Ins[I].Flags.isInReg()) { |
4953 | assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void> (0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 4953, __PRETTY_FUNCTION__)); |
4954 | |
4955 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
4956 | Register Reg = |
4957 | MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
4958 | FuncInfo->setSRetReturnReg(Reg); |
4959 | |
4960 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]); |
4961 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); |
4962 | break; |
4963 | } |
4964 | } |
4965 | } |
4966 | |
4967 | unsigned StackArgSize = CCInfo.getNextStackOffset(); |
4968 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; |
4969 | if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { |
4970 | // This is a non-standard ABI so by fiat I say we're allowed to make full |
4971 | // use of the stack area to be popped, which must be aligned to 16 bytes in |
4972 | // any case: |
4973 | StackArgSize = alignTo(StackArgSize, 16); |
4974 | |
4975 | // If we're expected to restore the stack (e.g. fastcc) then we'll be adding |
4976 | // a multiple of 16. |
4977 | FuncInfo->setArgumentStackToRestore(StackArgSize); |
4978 | |
4979 | // This realignment carries over to the available bytes below. Our own |
4980 | // callers will guarantee the space is free by giving an aligned value to |
4981 | // CALLSEQ_START. |
4982 | } |
4983 | // Even if we're not expected to free up the space, it's useful to know how |
4984 | // much is there while considering tail calls (because we can reuse it). |
4985 | FuncInfo->setBytesInStackArgArea(StackArgSize); |
4986 | |
4987 | if (Subtarget->hasCustomCallingConv()) |
4988 | Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
4989 | |
4990 | return Chain; |
4991 | } |
4992 | |
4993 | void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, |
4994 | SelectionDAG &DAG, |
4995 | const SDLoc &DL, |
4996 | SDValue &Chain) const { |
4997 | MachineFunction &MF = DAG.getMachineFunction(); |
4998 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4999 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5000 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
5001 | bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); |
5002 | |
5003 | SmallVector<SDValue, 8> MemOps; |
5004 | |
5005 | static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, |
5006 | AArch64::X3, AArch64::X4, AArch64::X5, |
5007 | AArch64::X6, AArch64::X7 }; |
5008 | static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); |
5009 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); |
5010 | |
5011 | unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); |
5012 | int GPRIdx = 0; |
5013 | if (GPRSaveSize != 0) { |
5014 | if (IsWin64) { |
5015 | GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); |
5016 | if (GPRSaveSize & 15) |
5017 | // The extra size here, if triggered, will always be 8. |
5018 | MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); |
5019 | } else |
5020 | GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); |
5021 | |
5022 | SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); |
5023 | |
5024 | for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { |
5025 | unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); |
5026 | SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); |
5027 | SDValue Store = DAG.getStore( |
5028 | Val.getValue(1), DL, Val, FIN, |
5029 | IsWin64 |
5030 | ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), |
5031 | GPRIdx, |
5032 | (i - FirstVariadicGPR) * 8) |
5033 | : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); |
5034 | MemOps.push_back(Store); |
5035 | FIN = |
5036 | DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); |
5037 | } |
5038 | } |
5039 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
5040 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
5041 | |
5042 | if (Subtarget->hasFPARMv8() && !IsWin64) { |
5043 | static const MCPhysReg FPRArgRegs[] = { |
5044 | AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, |
5045 | AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; |
5046 | static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); |
5047 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); |
5048 | |
5049 | unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); |
5050 | int FPRIdx = 0; |
5051 | if (FPRSaveSize != 0) { |
5052 | FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false); |
5053 | |
5054 | SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT); |
5055 | |
5056 | for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { |
5057 | unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); |
5058 | SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); |
5059 | |
5060 | SDValue Store = DAG.getStore( |
5061 | Val.getValue(1), DL, Val, FIN, |
5062 | MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16)); |
5063 | MemOps.push_back(Store); |
5064 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, |
5065 | DAG.getConstant(16, DL, PtrVT)); |
5066 | } |
5067 | } |
5068 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
5069 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
5070 | } |
5071 | |
5072 | if (!MemOps.empty()) { |
5073 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
5074 | } |
5075 | } |
5076 | |
5077 | /// LowerCallResult - Lower the result values of a call into the |
5078 | /// appropriate copies out of appropriate physical registers. |
5079 | SDValue AArch64TargetLowering::LowerCallResult( |
5080 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
5081 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
5082 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
5083 | SDValue ThisVal) const { |
5084 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
5085 | // Assign locations to each value returned by this call. |
5086 | SmallVector<CCValAssign, 16> RVLocs; |
5087 | DenseMap<unsigned, SDValue> CopiedRegs; |
5088 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
5089 | *DAG.getContext()); |
5090 | CCInfo.AnalyzeCallResult(Ins, RetCC); |
5091 | |
5092 | // Copy all of the result registers out of their specified physreg. |
5093 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
5094 | CCValAssign VA = RVLocs[i]; |
5095 | |
5096 | // Pass 'this' value directly from the argument to return value, to avoid |
5097 | // reg unit interference |
5098 | if (i == 0 && isThisReturn) { |
5099 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 && "unexpected return calling convention register assignment") ? static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5100, __PRETTY_FUNCTION__)) |
5100 | "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 && "unexpected return calling convention register assignment") ? static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5100, __PRETTY_FUNCTION__)); |
5101 | InVals.push_back(ThisVal); |
5102 | continue; |
5103 | } |
5104 | |
5105 | // Avoid copying a physreg twice since RegAllocFast is incompetent and only |
5106 | // allows one use of a physreg per block. |
5107 | SDValue Val = CopiedRegs.lookup(VA.getLocReg()); |
5108 | if (!Val) { |
5109 | Val = |
5110 | DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); |
5111 | Chain = Val.getValue(1); |
5112 | InFlag = Val.getValue(2); |
5113 | CopiedRegs[VA.getLocReg()] = Val; |
5114 | } |
5115 | |
5116 | switch (VA.getLocInfo()) { |
5117 | default: |
5118 | llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5118); |
5119 | case CCValAssign::Full: |
5120 | break; |
5121 | case CCValAssign::BCvt: |
5122 | Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); |
5123 | break; |
5124 | case CCValAssign::AExtUpper: |
5125 | Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, |
5126 | DAG.getConstant(32, DL, VA.getLocVT())); |
5127 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
5128 | case CCValAssign::AExt: |
5129 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
5130 | case CCValAssign::ZExt: |
5131 | Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); |
5132 | break; |
5133 | } |
5134 | |
5135 | InVals.push_back(Val); |
5136 | } |
5137 | |
5138 | return Chain; |
5139 | } |
5140 | |
5141 | /// Return true if the calling convention is one that we can guarantee TCO for. |
5142 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
5143 | return CC == CallingConv::Fast; |
5144 | } |
5145 | |
5146 | /// Return true if we might ever do TCO for calls with this calling convention. |
5147 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
5148 | switch (CC) { |
5149 | case CallingConv::C: |
5150 | case CallingConv::AArch64_SVE_VectorCall: |
5151 | case CallingConv::PreserveMost: |
5152 | case CallingConv::Swift: |
5153 | return true; |
5154 | default: |
5155 | return canGuaranteeTCO(CC); |
5156 | } |
5157 | } |
5158 | |
5159 | bool AArch64TargetLowering::isEligibleForTailCallOptimization( |
5160 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
5161 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5162 | const SmallVectorImpl<SDValue> &OutVals, |
5163 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
5164 | if (!mayTailCallThisCC(CalleeCC)) |
5165 | return false; |
5166 | |
5167 | MachineFunction &MF = DAG.getMachineFunction(); |
5168 | const Function &CallerF = MF.getFunction(); |
5169 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
5170 | |
5171 | // Functions using the C or Fast calling convention that have an SVE signature |
5172 | // preserve more registers and should assume the SVE_VectorCall CC. |
5173 | // The check for matching callee-saved regs will determine whether it is |
5174 | // eligible for TCO. |
5175 | if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) && |
5176 | AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) |
5177 | CallerCC = CallingConv::AArch64_SVE_VectorCall; |
5178 | |
5179 | bool CCMatch = CallerCC == CalleeCC; |
5180 | |
5181 | // When using the Windows calling convention on a non-windows OS, we want |
5182 | // to back up and restore X18 in such functions; we can't do a tail call |
5183 | // from those functions. |
5184 | if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() && |
5185 | CalleeCC != CallingConv::Win64) |
5186 | return false; |
5187 | |
5188 | // Byval parameters hand the function a pointer directly into the stack area |
5189 | // we want to reuse during a tail call. Working around this *is* possible (see |
5190 | // X86) but less efficient and uglier in LowerCall. |
5191 | for (Function::const_arg_iterator i = CallerF.arg_begin(), |
5192 | e = CallerF.arg_end(); |
5193 | i != e; ++i) { |
5194 | if (i->hasByValAttr()) |
5195 | return false; |
5196 | |
5197 | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
5198 | // In this case, it is necessary to save/restore X0 in the callee. Tail |
5199 | // call opt interferes with this. So we disable tail call opt when the |
5200 | // caller has an argument with "inreg" attribute. |
5201 | |
5202 | // FIXME: Check whether the callee also has an "inreg" argument. |
5203 | if (i->hasInRegAttr()) |
5204 | return false; |
5205 | } |
5206 | |
5207 | if (getTargetMachine().Options.GuaranteedTailCallOpt) |
5208 | return canGuaranteeTCO(CalleeCC) && CCMatch; |
5209 | |
5210 | // Externally-defined functions with weak linkage should not be |
5211 | // tail-called on AArch64 when the OS does not support dynamic |
5212 | // pre-emption of symbols, as the AAELF spec requires normal calls |
5213 | // to undefined weak functions to be replaced with a NOP or jump to the |
5214 | // next instruction. The behaviour of branch instructions in this |
5215 | // situation (as used for tail calls) is implementation-defined, so we |
5216 | // cannot rely on the linker replacing the tail call with a return. |
5217 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
5218 | const GlobalValue *GV = G->getGlobal(); |
5219 | const Triple &TT = getTargetMachine().getTargetTriple(); |
5220 | if (GV->hasExternalWeakLinkage() && |
5221 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) |
5222 | return false; |
5223 | } |
5224 | |
5225 | // Now we search for cases where we can use a tail call without changing the |
5226 | // ABI. Sibcall is used in some places (particularly gcc) to refer to this |
5227 | // concept. |
5228 | |
5229 | // I want anyone implementing a new calling convention to think long and hard |
5230 | // about this assert. |
5231 | assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention" ) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5232, __PRETTY_FUNCTION__)) |
5232 | "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention" ) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5232, __PRETTY_FUNCTION__)); |
5233 | |
5234 | LLVMContext &C = *DAG.getContext(); |
5235 | if (isVarArg && !Outs.empty()) { |
5236 | // At least two cases here: if caller is fastcc then we can't have any |
5237 | // memory arguments (we'd be expected to clean up the stack afterwards). If |
5238 | // caller is C then we could potentially use its argument area. |
5239 | |
5240 | // FIXME: for now we take the most conservative of these in both cases: |
5241 | // disallow all variadic memory operands. |
5242 | SmallVector<CCValAssign, 16> ArgLocs; |
5243 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
5244 | |
5245 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); |
5246 | for (const CCValAssign &ArgLoc : ArgLocs) |
5247 | if (!ArgLoc.isRegLoc()) |
5248 | return false; |
5249 | } |
5250 | |
5251 | // Check that the call results are passed in the same way. |
5252 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
5253 | CCAssignFnForCall(CalleeCC, isVarArg), |
5254 | CCAssignFnForCall(CallerCC, isVarArg))) |
5255 | return false; |
5256 | // The callee has to preserve all registers the caller needs to preserve. |
5257 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
5258 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
5259 | if (!CCMatch) { |
5260 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
5261 | if (Subtarget->hasCustomCallingConv()) { |
5262 | TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); |
5263 | TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); |
5264 | } |
5265 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
5266 | return false; |
5267 | } |
5268 | |
5269 | // Nothing more to check if the callee is taking no arguments |
5270 | if (Outs.empty()) |
5271 | return true; |
5272 | |
5273 | SmallVector<CCValAssign, 16> ArgLocs; |
5274 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
5275 | |
5276 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); |
5277 | |
5278 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5279 | |
5280 | // If any of the arguments is passed indirectly, it must be SVE, so the |
5281 | // 'getBytesInStackArgArea' is not sufficient to determine whether we need to |
5282 | // allocate space on the stack. That is why we determine this explicitly here |
5283 | // the call cannot be a tailcall. |
5284 | if (llvm::any_of(ArgLocs, [](CCValAssign &A) { |
5285 | assert((A.getLocInfo() != CCValAssign::Indirect ||(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector ()) && "Expected value to be scalable") ? static_cast <void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5287, __PRETTY_FUNCTION__)) |
5286 | A.getValVT().isScalableVector()) &&(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector ()) && "Expected value to be scalable") ? static_cast <void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5287, __PRETTY_FUNCTION__)) |
5287 | "Expected value to be scalable")(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector ()) && "Expected value to be scalable") ? static_cast <void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5287, __PRETTY_FUNCTION__)); |
5288 | return A.getLocInfo() == CCValAssign::Indirect; |
5289 | })) |
5290 | return false; |
5291 | |
5292 | // If the stack arguments for this call do not fit into our own save area then |
5293 | // the call cannot be made tail. |
5294 | if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) |
5295 | return false; |
5296 | |
5297 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
5298 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
5299 | return false; |
5300 | |
5301 | return true; |
5302 | } |
5303 | |
5304 | SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, |
5305 | SelectionDAG &DAG, |
5306 | MachineFrameInfo &MFI, |
5307 | int ClobberedFI) const { |
5308 | SmallVector<SDValue, 8> ArgChains; |
5309 | int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); |
5310 | int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; |
5311 | |
5312 | // Include the original chain at the beginning of the list. When this is |
5313 | // used by target LowerCall hooks, this helps legalize find the |
5314 | // CALLSEQ_BEGIN node. |
5315 | ArgChains.push_back(Chain); |
5316 | |
5317 | // Add a chain value for each stack argument corresponding |
5318 | for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), |
5319 | UE = DAG.getEntryNode().getNode()->use_end(); |
5320 | U != UE; ++U) |
5321 | if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) |
5322 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) |
5323 | if (FI->getIndex() < 0) { |
5324 | int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); |
5325 | int64_t InLastByte = InFirstByte; |
5326 | InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; |
5327 | |
5328 | if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || |
5329 | (FirstByte <= InFirstByte && InFirstByte <= LastByte)) |
5330 | ArgChains.push_back(SDValue(L, 1)); |
5331 | } |
5332 | |
5333 | // Build a tokenfactor for all the chains. |
5334 | return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); |
5335 | } |
5336 | |
5337 | bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, |
5338 | bool TailCallOpt) const { |
5339 | return CallCC == CallingConv::Fast && TailCallOpt; |
5340 | } |
5341 | |
5342 | /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, |
5343 | /// and add input and output parameter nodes. |
5344 | SDValue |
5345 | AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, |
5346 | SmallVectorImpl<SDValue> &InVals) const { |
5347 | SelectionDAG &DAG = CLI.DAG; |
5348 | SDLoc &DL = CLI.DL; |
5349 | SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; |
5350 | SmallVector<SDValue, 32> &OutVals = CLI.OutVals; |
5351 | SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; |
5352 | SDValue Chain = CLI.Chain; |
5353 | SDValue Callee = CLI.Callee; |
5354 | bool &IsTailCall = CLI.IsTailCall; |
5355 | CallingConv::ID CallConv = CLI.CallConv; |
5356 | bool IsVarArg = CLI.IsVarArg; |
5357 | |
5358 | MachineFunction &MF = DAG.getMachineFunction(); |
5359 | MachineFunction::CallSiteInfo CSInfo; |
5360 | bool IsThisReturn = false; |
5361 | |
5362 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5363 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; |
5364 | bool IsSibCall = false; |
5365 | |
5366 | // Check callee args/returns for SVE registers and set calling convention |
5367 | // accordingly. |
5368 | if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) { |
5369 | bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ |
5370 | return Out.VT.isScalableVector(); |
5371 | }); |
5372 | bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ |
5373 | return In.VT.isScalableVector(); |
5374 | }); |
5375 | |
5376 | if (CalleeInSVE || CalleeOutSVE) |
5377 | CallConv = CallingConv::AArch64_SVE_VectorCall; |
5378 | } |
5379 | |
5380 | if (IsTailCall) { |
5381 | // Check if it's really possible to do a tail call. |
5382 | IsTailCall = isEligibleForTailCallOptimization( |
5383 | Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); |
5384 | if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) |
5385 | report_fatal_error("failed to perform tail call elimination on a call " |
5386 | "site marked musttail"); |
5387 | |
5388 | // A sibling call is one where we're under the usual C ABI and not planning |
5389 | // to change that but can still do a tail call: |
5390 | if (!TailCallOpt && IsTailCall) |
5391 | IsSibCall = true; |
5392 | |
5393 | if (IsTailCall) |
5394 | ++NumTailCalls; |
5395 | } |
5396 | |
5397 | // Analyze operands of the call, assigning locations to each operand. |
5398 | SmallVector<CCValAssign, 16> ArgLocs; |
5399 | CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, |
5400 | *DAG.getContext()); |
5401 | |
5402 | if (IsVarArg) { |
5403 | // Handle fixed and variable vector arguments differently. |
5404 | // Variable vector arguments always go into memory. |
5405 | unsigned NumArgs = Outs.size(); |
5406 | |
5407 | for (unsigned i = 0; i != NumArgs; ++i) { |
5408 | MVT ArgVT = Outs[i].VT; |
5409 | if (!Outs[i].IsFixed && ArgVT.isScalableVector()) |
5410 | report_fatal_error("Passing SVE types to variadic functions is " |
5411 | "currently not supported"); |
5412 | |
5413 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
5414 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, |
5415 | /*IsVarArg=*/ !Outs[i].IsFixed); |
5416 | bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); |
5417 | assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast <void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5417, __PRETTY_FUNCTION__)); |
5418 | (void)Res; |
5419 | } |
5420 | } else { |
5421 | // At this point, Outs[].VT may already be promoted to i32. To correctly |
5422 | // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and |
5423 | // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. |
5424 | // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here |
5425 | // we use a special version of AnalyzeCallOperands to pass in ValVT and |
5426 | // LocVT. |
5427 | unsigned NumArgs = Outs.size(); |
5428 | for (unsigned i = 0; i != NumArgs; ++i) { |
5429 | MVT ValVT = Outs[i].VT; |
5430 | // Get type of the original argument. |
5431 | EVT ActualVT = getValueType(DAG.getDataLayout(), |
5432 | CLI.getArgs()[Outs[i].OrigArgIndex].Ty, |
5433 | /*AllowUnknown*/ true); |
5434 | MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; |
5435 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
5436 | // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. |
5437 | if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) |
5438 | ValVT = MVT::i8; |
5439 | else if (ActualMVT == MVT::i16) |
5440 | ValVT = MVT::i16; |
5441 | |
5442 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); |
5443 | bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); |
5444 | assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast <void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5444, __PRETTY_FUNCTION__)); |
5445 | (void)Res; |
5446 | } |
5447 | } |
5448 | |
5449 | // Get a count of how many bytes are to be pushed on the stack. |
5450 | unsigned NumBytes = CCInfo.getNextStackOffset(); |
5451 | |
5452 | if (IsSibCall) { |
5453 | // Since we're not changing the ABI to make this a tail call, the memory |
5454 | // operands are already available in the caller's incoming argument space. |
5455 | NumBytes = 0; |
5456 | } |
5457 | |
5458 | // FPDiff is the byte offset of the call's argument area from the callee's. |
5459 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
5460 | // by this amount for a tail call. In a sibling call it must be 0 because the |
5461 | // caller will deallocate the entire stack and the callee still expects its |
5462 | // arguments to begin at SP+0. Completely unused for non-tail calls. |
5463 | int FPDiff = 0; |
5464 | |
5465 | if (IsTailCall && !IsSibCall) { |
5466 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
5467 | |
5468 | // Since callee will pop argument stack as a tail call, we must keep the |
5469 | // popped size 16-byte aligned. |
5470 | NumBytes = alignTo(NumBytes, 16); |
5471 | |
5472 | // FPDiff will be negative if this tail call requires more space than we |
5473 | // would automatically have in our incoming argument space. Positive if we |
5474 | // can actually shrink the stack. |
5475 | FPDiff = NumReusableBytes - NumBytes; |
5476 | |
5477 | // The stack pointer must be 16-byte aligned at all times it's used for a |
5478 | // memory operation, which in practice means at *all* times and in |
5479 | // particular across call boundaries. Therefore our own arguments started at |
5480 | // a 16-byte aligned SP and the delta applied for the tail call should |
5481 | // satisfy the same constraint. |
5482 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call") ? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5482, __PRETTY_FUNCTION__)); |
5483 | } |
5484 | |
5485 | // Adjust the stack pointer for the new arguments... |
5486 | // These operations are automatically eliminated by the prolog/epilog pass |
5487 | if (!IsSibCall) |
5488 | Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); |
5489 | |
5490 | SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, |
5491 | getPointerTy(DAG.getDataLayout())); |
5492 | |
5493 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
5494 | SmallSet<unsigned, 8> RegsUsed; |
5495 | SmallVector<SDValue, 8> MemOpChains; |
5496 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
5497 | |
5498 | if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) { |
5499 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
5500 | for (const auto &F : Forwards) { |
5501 | SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); |
5502 | RegsToPass.emplace_back(F.PReg, Val); |
5503 | } |
5504 | } |
5505 | |
5506 | // Walk the register/memloc assignments, inserting copies/loads. |
5507 | unsigned ExtraArgLocs = 0; |
5508 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5509 | CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; |
5510 | SDValue Arg = OutVals[i]; |
5511 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5512 | |
5513 | // Promote the value if needed. |
5514 | switch (VA.getLocInfo()) { |
5515 | default: |
5516 | llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5516); |
5517 | case CCValAssign::Full: |
5518 | break; |
5519 | case CCValAssign::SExt: |
5520 | Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); |
5521 | break; |
5522 | case CCValAssign::ZExt: |
5523 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); |
5524 | break; |
5525 | case CCValAssign::AExt: |
5526 | if (Outs[i].ArgVT == MVT::i1) { |
5527 | // AAPCS requires i1 to be zero-extended to 8-bits by the caller. |
5528 | Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); |
5529 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg); |
5530 | } |
5531 | Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); |
5532 | break; |
5533 | case CCValAssign::AExtUpper: |
5534 | assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5534, __PRETTY_FUNCTION__)); |
5535 | Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); |
5536 | Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, |
5537 | DAG.getConstant(32, DL, VA.getLocVT())); |
5538 | break; |
5539 | case CCValAssign::BCvt: |
5540 | Arg = DAG.getBitcast(VA.getLocVT(), Arg); |
5541 | break; |
5542 | case CCValAssign::Trunc: |
5543 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
5544 | break; |
5545 | case CCValAssign::FPExt: |
5546 | Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); |
5547 | break; |
5548 | case CCValAssign::Indirect: |
5549 | assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5550, __PRETTY_FUNCTION__)) |
5550 | "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5550, __PRETTY_FUNCTION__)); |
5551 | |
5552 | uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize(); |
5553 | uint64_t PartSize = StoreSize; |
5554 | unsigned NumParts = 1; |
5555 | if (Outs[i].Flags.isInConsecutiveRegs()) { |
5556 | assert(!Outs[i].Flags.isInConsecutiveRegsLast())((!Outs[i].Flags.isInConsecutiveRegsLast()) ? static_cast< void> (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5556, __PRETTY_FUNCTION__)); |
5557 | while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) |
5558 | ++NumParts; |
5559 | StoreSize *= NumParts; |
5560 | } |
5561 | |
5562 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
5563 | Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext()); |
5564 | Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty); |
5565 | int FI = MFI.CreateStackObject(StoreSize, Alignment, false); |
5566 | MFI.setStackID(FI, TargetStackID::ScalableVector); |
5567 | |
5568 | MachinePointerInfo MPI = |
5569 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); |
5570 | SDValue Ptr = DAG.getFrameIndex( |
5571 | FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); |
5572 | SDValue SpillSlot = Ptr; |
5573 | |
5574 | // Ensure we generate all stores for each tuple part, whilst updating the |
5575 | // pointer after each store correctly using vscale. |
5576 | while (NumParts) { |
5577 | Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI); |
5578 | NumParts--; |
5579 | if (NumParts > 0) { |
5580 | SDValue BytesIncrement = DAG.getVScale( |
5581 | DL, Ptr.getValueType(), |
5582 | APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); |
5583 | SDNodeFlags Flags; |
5584 | Flags.setNoUnsignedWrap(true); |
5585 | |
5586 | MPI = MachinePointerInfo(MPI.getAddrSpace()); |
5587 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
5588 | BytesIncrement, Flags); |
5589 | ExtraArgLocs++; |
5590 | i++; |
5591 | } |
5592 | } |
5593 | |
5594 | Arg = SpillSlot; |
5595 | break; |
5596 | } |
5597 | |
5598 | if (VA.isRegLoc()) { |
5599 | if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && |
5600 | Outs[0].VT == MVT::i64) { |
5601 | assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment" ) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5602, __PRETTY_FUNCTION__)) |
5602 | "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment" ) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5602, __PRETTY_FUNCTION__)); |
5603 | assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'" ) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5604, __PRETTY_FUNCTION__)) |
5604 | "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'" ) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5604, __PRETTY_FUNCTION__)); |
5605 | IsThisReturn = true; |
5606 | } |
5607 | if (RegsUsed.count(VA.getLocReg())) { |
5608 | // If this register has already been used then we're trying to pack |
5609 | // parts of an [N x i32] into an X-register. The extension type will |
5610 | // take care of putting the two halves in the right place but we have to |
5611 | // combine them. |
5612 | SDValue &Bits = |
5613 | llvm::find_if(RegsToPass, |
5614 | [=](const std::pair<unsigned, SDValue> &Elt) { |
5615 | return Elt.first == VA.getLocReg(); |
5616 | }) |
5617 | ->second; |
5618 | Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); |
5619 | // Call site info is used for function's parameter entry value |
5620 | // tracking. For now we track only simple cases when parameter |
5621 | // is transferred through whole register. |
5622 | llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) { |
5623 | return ArgReg.Reg == VA.getLocReg(); |
5624 | }); |
5625 | } else { |
5626 | RegsToPass.emplace_back(VA.getLocReg(), Arg); |
5627 | RegsUsed.insert(VA.getLocReg()); |
5628 | const TargetOptions &Options = DAG.getTarget().Options; |
5629 | if (Options.EmitCallSiteInfo) |
5630 | CSInfo.emplace_back(VA.getLocReg(), i); |
5631 | } |
5632 | } else { |
5633 | assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5633, __PRETTY_FUNCTION__)); |
5634 | |
5635 | SDValue DstAddr; |
5636 | MachinePointerInfo DstInfo; |
5637 | |
5638 | // FIXME: This works on big-endian for composite byvals, which are the |
5639 | // common case. It should also work for fundamental types too. |
5640 | uint32_t BEAlign = 0; |
5641 | unsigned OpSize; |
5642 | if (VA.getLocInfo() == CCValAssign::Indirect) |
5643 | OpSize = VA.getLocVT().getFixedSizeInBits(); |
5644 | else |
5645 | OpSize = Flags.isByVal() ? Flags.getByValSize() * 8 |
5646 | : VA.getValVT().getSizeInBits(); |
5647 | OpSize = (OpSize + 7) / 8; |
5648 | if (!Subtarget->isLittleEndian() && !Flags.isByVal() && |
5649 | !Flags.isInConsecutiveRegs()) { |
5650 | if (OpSize < 8) |
5651 | BEAlign = 8 - OpSize; |
5652 | } |
5653 | unsigned LocMemOffset = VA.getLocMemOffset(); |
5654 | int32_t Offset = LocMemOffset + BEAlign; |
5655 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); |
5656 | PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); |
5657 | |
5658 | if (IsTailCall) { |
5659 | Offset = Offset + FPDiff; |
5660 | int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
5661 | |
5662 | DstAddr = DAG.getFrameIndex(FI, PtrVT); |
5663 | DstInfo = |
5664 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); |
5665 | |
5666 | // Make sure any stack arguments overlapping with where we're storing |
5667 | // are loaded before this eventual operation. Otherwise they'll be |
5668 | // clobbered. |
5669 | Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); |
5670 | } else { |
5671 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); |
5672 | |
5673 | DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); |
5674 | DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(), |
5675 | LocMemOffset); |
5676 | } |
5677 | |
5678 | if (Outs[i].Flags.isByVal()) { |
5679 | SDValue SizeNode = |
5680 | DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); |
5681 | SDValue Cpy = DAG.getMemcpy( |
5682 | Chain, DL, DstAddr, Arg, SizeNode, |
5683 | Outs[i].Flags.getNonZeroByValAlign(), |
5684 | /*isVol = */ false, /*AlwaysInline = */ false, |
5685 | /*isTailCall = */ false, DstInfo, MachinePointerInfo()); |
5686 | |
5687 | MemOpChains.push_back(Cpy); |
5688 | } else { |
5689 | // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already |
5690 | // promoted to a legal register type i32, we should truncate Arg back to |
5691 | // i1/i8/i16. |
5692 | if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || |
5693 | VA.getValVT() == MVT::i16) |
5694 | Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); |
5695 | |
5696 | SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo); |
5697 | MemOpChains.push_back(Store); |
5698 | } |
5699 | } |
5700 | } |
5701 | |
5702 | if (!MemOpChains.empty()) |
5703 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
5704 | |
5705 | // Build a sequence of copy-to-reg nodes chained together with token chain |
5706 | // and flag operands which copy the outgoing args into the appropriate regs. |
5707 | SDValue InFlag; |
5708 | for (auto &RegToPass : RegsToPass) { |
5709 | Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first, |
5710 | RegToPass.second, InFlag); |
5711 | InFlag = Chain.getValue(1); |
5712 | } |
5713 | |
5714 | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every |
5715 | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol |
5716 | // node so that legalize doesn't hack it. |
5717 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
5718 | auto GV = G->getGlobal(); |
5719 | unsigned OpFlags = |
5720 | Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()); |
5721 | if (OpFlags & AArch64II::MO_GOT) { |
5722 | Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); |
5723 | Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); |
5724 | } else { |
5725 | const GlobalValue *GV = G->getGlobal(); |
5726 | Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); |
5727 | } |
5728 | } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
5729 | if (getTargetMachine().getCodeModel() == CodeModel::Large && |
5730 | Subtarget->isTargetMachO()) { |
5731 | const char *Sym = S->getSymbol(); |
5732 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); |
5733 | Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); |
5734 | } else { |
5735 | const char *Sym = S->getSymbol(); |
5736 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); |
5737 | } |
5738 | } |
5739 | |
5740 | // We don't usually want to end the call-sequence here because we would tidy |
5741 | // the frame up *after* the call, however in the ABI-changing tail-call case |
5742 | // we've carefully laid out the parameters so that when sp is reset they'll be |
5743 | // in the correct location. |
5744 | if (IsTailCall && !IsSibCall) { |
5745 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), |
5746 | DAG.getIntPtrConstant(0, DL, true), InFlag, DL); |
5747 | InFlag = Chain.getValue(1); |
5748 | } |
5749 | |
5750 | std::vector<SDValue> Ops; |
5751 | Ops.push_back(Chain); |
5752 | Ops.push_back(Callee); |
5753 | |
5754 | if (IsTailCall) { |
5755 | // Each tail call may have to adjust the stack by a different amount, so |
5756 | // this information must travel along with the operation for eventual |
5757 | // consumption by emitEpilogue. |
5758 | Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); |
5759 | } |
5760 | |
5761 | // Add argument registers to the end of the list so that they are known live |
5762 | // into the call. |
5763 | for (auto &RegToPass : RegsToPass) |
5764 | Ops.push_back(DAG.getRegister(RegToPass.first, |
5765 | RegToPass.second.getValueType())); |
5766 | |
5767 | // Add a register mask operand representing the call-preserved registers. |
5768 | const uint32_t *Mask; |
5769 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
5770 | if (IsThisReturn) { |
5771 | // For 'this' returns, use the X0-preserving mask if applicable |
5772 | Mask = TRI->getThisReturnPreservedMask(MF, CallConv); |
5773 | if (!Mask) { |
5774 | IsThisReturn = false; |
5775 | Mask = TRI->getCallPreservedMask(MF, CallConv); |
5776 | } |
5777 | } else |
5778 | Mask = TRI->getCallPreservedMask(MF, CallConv); |
5779 | |
5780 | if (Subtarget->hasCustomCallingConv()) |
5781 | TRI->UpdateCustomCallPreservedMask(MF, &Mask); |
5782 | |
5783 | if (TRI->isAnyArgRegReserved(MF)) |
5784 | TRI->emitReservedArgRegCallError(MF); |
5785 | |
5786 | assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention" ) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5786, __PRETTY_FUNCTION__)); |
5787 | Ops.push_back(DAG.getRegisterMask(Mask)); |
5788 | |
5789 | if (InFlag.getNode()) |
5790 | Ops.push_back(InFlag); |
5791 | |
5792 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
5793 | |
5794 | // If we're doing a tall call, use a TC_RETURN here rather than an |
5795 | // actual call instruction. |
5796 | if (IsTailCall) { |
5797 | MF.getFrameInfo().setHasTailCall(); |
5798 | SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); |
5799 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
5800 | return Ret; |
5801 | } |
5802 | |
5803 | unsigned CallOpc = AArch64ISD::CALL; |
5804 | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
5805 | // be expanded to the call, directly followed by a special marker sequence. |
5806 | // Use the CALL_RVMARKER to do that. |
5807 | if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { |
5808 | assert(!IsTailCall &&((!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall" ) ? static_cast<void> (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5809, __PRETTY_FUNCTION__)) |
5809 | "tail calls cannot be marked with clang.arc.attachedcall")((!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall" ) ? static_cast<void> (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5809, __PRETTY_FUNCTION__)); |
5810 | CallOpc = AArch64ISD::CALL_RVMARKER; |
5811 | } |
5812 | |
5813 | // Returns a chain and a flag for retval copy to use. |
5814 | Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); |
5815 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
5816 | InFlag = Chain.getValue(1); |
5817 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
5818 | |
5819 | uint64_t CalleePopBytes = |
5820 | DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; |
5821 | |
5822 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), |
5823 | DAG.getIntPtrConstant(CalleePopBytes, DL, true), |
5824 | InFlag, DL); |
5825 | if (!Ins.empty()) |
5826 | InFlag = Chain.getValue(1); |
5827 | |
5828 | // Handle result values, copying them out of physregs into vregs that we |
5829 | // return. |
5830 | return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, |
5831 | InVals, IsThisReturn, |
5832 | IsThisReturn ? OutVals[0] : SDValue()); |
5833 | } |
5834 | |
5835 | bool AArch64TargetLowering::CanLowerReturn( |
5836 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
5837 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
5838 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
5839 | SmallVector<CCValAssign, 16> RVLocs; |
5840 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
5841 | return CCInfo.CheckReturn(Outs, RetCC); |
5842 | } |
5843 | |
5844 | SDValue |
5845 | AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
5846 | bool isVarArg, |
5847 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5848 | const SmallVectorImpl<SDValue> &OutVals, |
5849 | const SDLoc &DL, SelectionDAG &DAG) const { |
5850 | auto &MF = DAG.getMachineFunction(); |
5851 | auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5852 | |
5853 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
5854 | SmallVector<CCValAssign, 16> RVLocs; |
5855 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
5856 | *DAG.getContext()); |
5857 | CCInfo.AnalyzeReturn(Outs, RetCC); |
5858 | |
5859 | // Copy the result values into the output registers. |
5860 | SDValue Flag; |
5861 | SmallVector<std::pair<unsigned, SDValue>, 4> RetVals; |
5862 | SmallSet<unsigned, 4> RegsUsed; |
5863 | for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); |
5864 | ++i, ++realRVLocIdx) { |
5865 | CCValAssign &VA = RVLocs[i]; |
5866 | assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5866, __PRETTY_FUNCTION__)); |
5867 | SDValue Arg = OutVals[realRVLocIdx]; |
5868 | |
5869 | switch (VA.getLocInfo()) { |
5870 | default: |
5871 | llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5871); |
5872 | case CCValAssign::Full: |
5873 | if (Outs[i].ArgVT == MVT::i1) { |
5874 | // AAPCS requires i1 to be zero-extended to i8 by the producer of the |
5875 | // value. This is strictly redundant on Darwin (which uses "zeroext |
5876 | // i1"), but will be optimised out before ISel. |
5877 | Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); |
5878 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); |
5879 | } |
5880 | break; |
5881 | case CCValAssign::BCvt: |
5882 | Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); |
5883 | break; |
5884 | case CCValAssign::AExt: |
5885 | case CCValAssign::ZExt: |
5886 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
5887 | break; |
5888 | case CCValAssign::AExtUpper: |
5889 | assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits" ) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\"" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5889, __PRETTY_FUNCTION__)); |
5890 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
5891 | Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, |
5892 | DAG.getConstant(32, DL, VA.getLocVT())); |
5893 | break; |
5894 | } |
5895 | |
5896 | if (RegsUsed.count(VA.getLocReg())) { |
5897 | SDValue &Bits = |
5898 | llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) { |
5899 | return Elt.first == VA.getLocReg(); |
5900 | })->second; |
5901 | Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); |
5902 | } else { |
5903 | RetVals.emplace_back(VA.getLocReg(), Arg); |
5904 | RegsUsed.insert(VA.getLocReg()); |
5905 | } |
5906 | } |
5907 | |
5908 | SmallVector<SDValue, 4> RetOps(1, Chain); |
5909 | for (auto &RetVal : RetVals) { |
5910 | Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); |
5911 | Flag = Chain.getValue(1); |
5912 | RetOps.push_back( |
5913 | DAG.getRegister(RetVal.first, RetVal.second.getValueType())); |
5914 | } |
5915 | |
5916 | // Windows AArch64 ABIs require that for returning structs by value we copy |
5917 | // the sret argument into X0 for the return. |
5918 | // We saved the argument into a virtual register in the entry block, |
5919 | // so now we copy the value out and into X0. |
5920 | if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { |
5921 | SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg, |
5922 | getPointerTy(MF.getDataLayout())); |
5923 | |
5924 | unsigned RetValReg = AArch64::X0; |
5925 | Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag); |
5926 | Flag = Chain.getValue(1); |
5927 | |
5928 | RetOps.push_back( |
5929 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
5930 | } |
5931 | |
5932 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
5933 | const MCPhysReg *I = |
5934 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
5935 | if (I) { |
5936 | for (; *I; ++I) { |
5937 | if (AArch64::GPR64RegClass.contains(*I)) |
5938 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
5939 | else if (AArch64::FPR64RegClass.contains(*I)) |
5940 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); |
5941 | else |
5942 | llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!" , "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp" , 5942); |
5943 | } |
5944 | } |
5945 | |
5946 | RetOps[0] = Chain; // Update chain. |
5947 | |
5948 | // Add the flag if we have it. |
5949 | if (Flag.getNode()) |
5950 | RetOps.push_back(Flag); |
5951 | |
5952 | return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps); |
5953 | } |
5954 | |
5955 | //===----------------------------------------------------------------------===// |
5956 | // Other Lowering Code |
5957 | //===----------------------------------------------------------------------===// |
5958 | |
5959 | SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, |
5960 | SelectionDAG &DAG, |
5961 | unsigned Flag) const { |
5962 | return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, |
5963 | N->getOffset(), Flag); |
5964 | } |
5965 | |
5966 | SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, |
5967 | SelectionDAG &DAG, |
5968 | unsigned Flag) const { |
5969 | return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); |
5970 | } |
5971 | |
5972 | SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, |
5973 | SelectionDAG &DAG, |
5974 | unsigned Flag) const { |
5975 | return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), |
5976 | N->getOffset(), Flag); |
5977 | } |
5978 | |
5979 | SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, |
5980 | SelectionDAG &DAG, |
5981 | unsigned Flag) const { |
5982 | return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); |
5983 | } |
5984 | |
5985 | // (loadGOT sym) |
5986 | template <class NodeTy> |
5987 | SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG, |
5988 | unsigned Flags) const { |
5989 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n" ; } } while (false); |
5990 | SDLoc DL(N); |
5991 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
5992 | SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags); |
5993 | // FIXME: Once remat is capable of dealing with instructions with register |
5994 | // operands, expand this into two nodes instead of using a wrapper node. |
5995 | return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); |
5996 | } |
5997 | |
5998 | // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) |
5999 | template <class NodeTy> |
6000 | SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG, |
6001 | unsigned Flags) const { |
6002 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n" ; } } while (false); |
6003 | SDLoc DL(N); |
6004 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6005 | const unsigned char MO_NC = AArch64II::MO_NC; |
6006 | return DAG.getNode( |
6007 | AArch64ISD::WrapperLarge, DL, Ty, |
6008 | getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags), |
6009 | getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags), |
6010 | getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags), |
6011 | getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags)); |
6012 | } |
6013 | |
6014 | // (addlow (adrp %hi(sym)) %lo(sym)) |
6015 | template <class NodeTy> |
6016 | SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
6017 | unsigned Flags) const { |
6018 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n" ; } } while (false); |
6019 | SDLoc DL(N); |
6020 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6021 | SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags); |
6022 | SDValue Lo = getTargetNode(N, Ty, DAG, |
6023 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags); |
6024 | SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); |
6025 | return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); |
6026 | } |
6027 | |
6028 | // (adr sym) |
6029 | template <class NodeTy> |
6030 | SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG, |
6031 | unsigned Flags) const { |
6032 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n" ; } } while (false); |
6033 | SDLoc DL(N); |
6034 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6035 | SDValue Sym = getTargetNode(N, Ty, DAG, Flags); |
6036 | return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym); |
6037 | } |
6038 | |
6039 | SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, |
6040 | SelectionDAG &DAG) const { |
6041 | GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); |