clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | #include "AArch64ISelLowering.h" |
14 | #include "AArch64CallingConvention.h" |
15 | #include "AArch64ExpandImm.h" |
16 | #include "AArch64MachineFunctionInfo.h" |
17 | #include "AArch64PerfectShuffle.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/ADT/APFloat.h" |
23 | #include "llvm/ADT/APInt.h" |
24 | #include "llvm/ADT/ArrayRef.h" |
25 | #include "llvm/ADT/STLExtras.h" |
26 | #include "llvm/ADT/SmallSet.h" |
27 | #include "llvm/ADT/SmallVector.h" |
28 | #include "llvm/ADT/Statistic.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/ADT/Triple.h" |
31 | #include "llvm/ADT/Twine.h" |
32 | #include "llvm/Analysis/ObjCARCUtil.h" |
33 | #include "llvm/Analysis/VectorUtils.h" |
34 | #include "llvm/CodeGen/Analysis.h" |
35 | #include "llvm/CodeGen/CallingConvLower.h" |
36 | #include "llvm/CodeGen/MachineBasicBlock.h" |
37 | #include "llvm/CodeGen/MachineFrameInfo.h" |
38 | #include "llvm/CodeGen/MachineFunction.h" |
39 | #include "llvm/CodeGen/MachineInstr.h" |
40 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
41 | #include "llvm/CodeGen/MachineMemOperand.h" |
42 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
43 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
44 | #include "llvm/CodeGen/SelectionDAG.h" |
45 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
46 | #include "llvm/CodeGen/TargetCallingConv.h" |
47 | #include "llvm/CodeGen/TargetInstrInfo.h" |
48 | #include "llvm/CodeGen/ValueTypes.h" |
49 | #include "llvm/IR/Attributes.h" |
50 | #include "llvm/IR/Constants.h" |
51 | #include "llvm/IR/DataLayout.h" |
52 | #include "llvm/IR/DebugLoc.h" |
53 | #include "llvm/IR/DerivedTypes.h" |
54 | #include "llvm/IR/Function.h" |
55 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
56 | #include "llvm/IR/GlobalValue.h" |
57 | #include "llvm/IR/IRBuilder.h" |
58 | #include "llvm/IR/Instruction.h" |
59 | #include "llvm/IR/Instructions.h" |
60 | #include "llvm/IR/IntrinsicInst.h" |
61 | #include "llvm/IR/Intrinsics.h" |
62 | #include "llvm/IR/IntrinsicsAArch64.h" |
63 | #include "llvm/IR/Module.h" |
64 | #include "llvm/IR/OperandTraits.h" |
65 | #include "llvm/IR/PatternMatch.h" |
66 | #include "llvm/IR/Type.h" |
67 | #include "llvm/IR/Use.h" |
68 | #include "llvm/IR/Value.h" |
69 | #include "llvm/MC/MCRegisterInfo.h" |
70 | #include "llvm/Support/Casting.h" |
71 | #include "llvm/Support/CodeGen.h" |
72 | #include "llvm/Support/CommandLine.h" |
73 | #include "llvm/Support/Compiler.h" |
74 | #include "llvm/Support/Debug.h" |
75 | #include "llvm/Support/ErrorHandling.h" |
76 | #include "llvm/Support/KnownBits.h" |
77 | #include "llvm/Support/MachineValueType.h" |
78 | #include "llvm/Support/MathExtras.h" |
79 | #include "llvm/Support/raw_ostream.h" |
80 | #include "llvm/Target/TargetMachine.h" |
81 | #include "llvm/Target/TargetOptions.h" |
82 | #include <algorithm> |
83 | #include <bitset> |
84 | #include <cassert> |
85 | #include <cctype> |
86 | #include <cstdint> |
87 | #include <cstdlib> |
88 | #include <iterator> |
89 | #include <limits> |
90 | #include <tuple> |
91 | #include <utility> |
92 | #include <vector> |
93 | |
94 | using namespace llvm; |
95 | using namespace llvm::PatternMatch; |
96 | |
97 | #define DEBUG_TYPE "aarch64-lower" |
98 | |
99 | STATISTIC(NumTailCalls, "Number of tail calls"); |
100 | STATISTIC(NumShiftInserts, "Number of vector shift inserts"); |
101 | STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); |
102 | |
103 | |
104 | |
105 | |
106 | cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration( |
107 | "aarch64-elf-ldtls-generation", cl::Hidden, |
108 | cl::desc("Allow AArch64 Local Dynamic TLS code generation"), |
109 | cl::init(false)); |
110 | |
111 | static cl::opt<bool> |
112 | EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, |
113 | cl::desc("Enable AArch64 logical imm instruction " |
114 | "optimization"), |
115 | cl::init(true)); |
116 | |
117 | |
118 | |
119 | |
120 | |
121 | static cl::opt<bool> |
122 | EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, |
123 | cl::desc("Combine extends of AArch64 masked " |
124 | "gather intrinsics"), |
125 | cl::init(true)); |
126 | |
127 | |
128 | static const MVT MVT_CC = MVT::i32; |
129 | |
130 | static inline EVT getPackedSVEVectorVT(EVT VT) { |
131 | switch (VT.getSimpleVT().SimpleTy) { |
132 | default: |
133 | llvm_unreachable("unexpected element type for vector"); |
134 | case MVT::i8: |
135 | return MVT::nxv16i8; |
136 | case MVT::i16: |
137 | return MVT::nxv8i16; |
138 | case MVT::i32: |
139 | return MVT::nxv4i32; |
140 | case MVT::i64: |
141 | return MVT::nxv2i64; |
142 | case MVT::f16: |
143 | return MVT::nxv8f16; |
144 | case MVT::f32: |
145 | return MVT::nxv4f32; |
146 | case MVT::f64: |
147 | return MVT::nxv2f64; |
148 | case MVT::bf16: |
149 | return MVT::nxv8bf16; |
150 | } |
151 | } |
152 | |
153 | |
154 | |
155 | static inline EVT getPackedSVEVectorVT(ElementCount EC) { |
156 | switch (EC.getKnownMinValue()) { |
157 | default: |
158 | llvm_unreachable("unexpected element count for vector"); |
159 | case 16: |
160 | return MVT::nxv16i8; |
161 | case 8: |
162 | return MVT::nxv8i16; |
163 | case 4: |
164 | return MVT::nxv4i32; |
165 | case 2: |
166 | return MVT::nxv2i64; |
167 | } |
168 | } |
169 | |
170 | static inline EVT getPromotedVTForPredicate(EVT VT) { |
171 | assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && |
172 | "Expected scalable predicate vector type!"); |
173 | switch (VT.getVectorMinNumElements()) { |
174 | default: |
175 | llvm_unreachable("unexpected element count for vector"); |
176 | case 2: |
177 | return MVT::nxv2i64; |
178 | case 4: |
179 | return MVT::nxv4i32; |
180 | case 8: |
181 | return MVT::nxv8i16; |
182 | case 16: |
183 | return MVT::nxv16i8; |
184 | } |
185 | } |
186 | |
187 | |
188 | |
189 | |
190 | |
191 | |
192 | static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) { |
193 | assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
194 | "Expected legal vector type!"); |
195 | return VT.isFixedLengthVector() || |
196 | VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock; |
197 | } |
198 | |
199 | |
200 | |
201 | static bool isMergePassthruOpcode(unsigned Opc) { |
202 | switch (Opc) { |
203 | default: |
204 | return false; |
205 | case AArch64ISD::BITREVERSE_MERGE_PASSTHRU: |
206 | case AArch64ISD::BSWAP_MERGE_PASSTHRU: |
207 | case AArch64ISD::CTLZ_MERGE_PASSTHRU: |
208 | case AArch64ISD::CTPOP_MERGE_PASSTHRU: |
209 | case AArch64ISD::DUP_MERGE_PASSTHRU: |
210 | case AArch64ISD::ABS_MERGE_PASSTHRU: |
211 | case AArch64ISD::NEG_MERGE_PASSTHRU: |
212 | case AArch64ISD::FNEG_MERGE_PASSTHRU: |
213 | case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: |
214 | case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: |
215 | case AArch64ISD::FCEIL_MERGE_PASSTHRU: |
216 | case AArch64ISD::FFLOOR_MERGE_PASSTHRU: |
217 | case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU: |
218 | case AArch64ISD::FRINT_MERGE_PASSTHRU: |
219 | case AArch64ISD::FROUND_MERGE_PASSTHRU: |
220 | case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: |
221 | case AArch64ISD::FTRUNC_MERGE_PASSTHRU: |
222 | case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: |
223 | case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: |
224 | case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: |
225 | case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: |
226 | case AArch64ISD::FCVTZU_MERGE_PASSTHRU: |
227 | case AArch64ISD::FCVTZS_MERGE_PASSTHRU: |
228 | case AArch64ISD::FSQRT_MERGE_PASSTHRU: |
229 | case AArch64ISD::FRECPX_MERGE_PASSTHRU: |
230 | case AArch64ISD::FABS_MERGE_PASSTHRU: |
231 | return true; |
232 | } |
233 | } |
234 | |
235 | AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, |
236 | const AArch64Subtarget &STI) |
237 | : TargetLowering(TM), Subtarget(&STI) { |
238 | |
239 | |
240 | setBooleanContents(ZeroOrOneBooleanContent); |
241 | |
242 | |
243 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
244 | |
245 | |
246 | addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); |
247 | addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); |
248 | |
249 | if (Subtarget->hasLS64()) { |
250 | addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass); |
251 | setOperationAction(ISD::LOAD, MVT::i64x8, Custom); |
252 | setOperationAction(ISD::STORE, MVT::i64x8, Custom); |
253 | } |
254 | |
255 | if (Subtarget->hasFPARMv8()) { |
256 | addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); |
257 | addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass); |
258 | addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); |
259 | addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); |
260 | addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); |
261 | } |
262 | |
263 | if (Subtarget->hasNEON()) { |
264 | addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); |
265 | addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); |
266 | |
267 | addDRTypeForNEON(MVT::v2f32); |
268 | addDRTypeForNEON(MVT::v8i8); |
269 | addDRTypeForNEON(MVT::v4i16); |
270 | addDRTypeForNEON(MVT::v2i32); |
271 | addDRTypeForNEON(MVT::v1i64); |
272 | addDRTypeForNEON(MVT::v1f64); |
273 | addDRTypeForNEON(MVT::v4f16); |
274 | if (Subtarget->hasBF16()) |
275 | addDRTypeForNEON(MVT::v4bf16); |
276 | |
277 | addQRTypeForNEON(MVT::v4f32); |
278 | addQRTypeForNEON(MVT::v2f64); |
279 | addQRTypeForNEON(MVT::v16i8); |
280 | addQRTypeForNEON(MVT::v8i16); |
281 | addQRTypeForNEON(MVT::v4i32); |
282 | addQRTypeForNEON(MVT::v2i64); |
283 | addQRTypeForNEON(MVT::v8f16); |
284 | if (Subtarget->hasBF16()) |
285 | addQRTypeForNEON(MVT::v8bf16); |
286 | } |
287 | |
288 | if (Subtarget->hasSVE()) { |
289 | |
290 | addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); |
291 | addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); |
292 | addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); |
293 | addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); |
294 | |
295 | |
296 | addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); |
297 | addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); |
298 | addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); |
299 | addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); |
300 | |
301 | addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); |
302 | addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); |
303 | addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); |
304 | addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); |
305 | addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); |
306 | addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); |
307 | |
308 | if (Subtarget->hasBF16()) { |
309 | addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass); |
310 | addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass); |
311 | addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass); |
312 | } |
313 | |
314 | if (Subtarget->useSVEForFixedLengthVectors()) { |
315 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
316 | if (useSVEForFixedLengthVectorVT(VT)) |
317 | addRegisterClass(VT, &AArch64::ZPRRegClass); |
318 | |
319 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
320 | if (useSVEForFixedLengthVectorVT(VT)) |
321 | addRegisterClass(VT, &AArch64::ZPRRegClass); |
322 | } |
323 | |
324 | for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) { |
325 | setOperationAction(ISD::SADDSAT, VT, Legal); |
326 | setOperationAction(ISD::UADDSAT, VT, Legal); |
327 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
328 | setOperationAction(ISD::USUBSAT, VT, Legal); |
329 | setOperationAction(ISD::UREM, VT, Expand); |
330 | setOperationAction(ISD::SREM, VT, Expand); |
331 | setOperationAction(ISD::SDIVREM, VT, Expand); |
332 | setOperationAction(ISD::UDIVREM, VT, Expand); |
333 | } |
334 | |
335 | for (auto VT : |
336 | { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8, |
337 | MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 }) |
338 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal); |
339 | |
340 | for (auto VT : |
341 | { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, |
342 | MVT::nxv2f64 }) { |
343 | setCondCodeAction(ISD::SETO, VT, Expand); |
344 | setCondCodeAction(ISD::SETOLT, VT, Expand); |
345 | setCondCodeAction(ISD::SETLT, VT, Expand); |
346 | setCondCodeAction(ISD::SETOLE, VT, Expand); |
347 | setCondCodeAction(ISD::SETLE, VT, Expand); |
348 | setCondCodeAction(ISD::SETULT, VT, Expand); |
349 | setCondCodeAction(ISD::SETULE, VT, Expand); |
350 | setCondCodeAction(ISD::SETUGE, VT, Expand); |
351 | setCondCodeAction(ISD::SETUGT, VT, Expand); |
352 | setCondCodeAction(ISD::SETUEQ, VT, Expand); |
353 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
354 | |
355 | setOperationAction(ISD::FREM, VT, Expand); |
356 | setOperationAction(ISD::FPOW, VT, Expand); |
357 | setOperationAction(ISD::FPOWI, VT, Expand); |
358 | setOperationAction(ISD::FCOS, VT, Expand); |
359 | setOperationAction(ISD::FSIN, VT, Expand); |
360 | setOperationAction(ISD::FSINCOS, VT, Expand); |
361 | setOperationAction(ISD::FEXP, VT, Expand); |
362 | setOperationAction(ISD::FEXP2, VT, Expand); |
363 | setOperationAction(ISD::FLOG, VT, Expand); |
364 | setOperationAction(ISD::FLOG2, VT, Expand); |
365 | setOperationAction(ISD::FLOG10, VT, Expand); |
366 | } |
367 | } |
368 | |
369 | |
370 | computeRegisterProperties(Subtarget->getRegisterInfo()); |
371 | |
372 | |
373 | setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); |
374 | setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); |
375 | setOperationAction(ISD::SETCC, MVT::i32, Custom); |
376 | setOperationAction(ISD::SETCC, MVT::i64, Custom); |
377 | setOperationAction(ISD::SETCC, MVT::f16, Custom); |
378 | setOperationAction(ISD::SETCC, MVT::f32, Custom); |
379 | setOperationAction(ISD::SETCC, MVT::f64, Custom); |
380 | setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom); |
381 | setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom); |
382 | setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom); |
383 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom); |
384 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom); |
385 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom); |
386 | setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); |
387 | setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); |
388 | setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
389 | setOperationAction(ISD::BR_CC, MVT::i32, Custom); |
390 | setOperationAction(ISD::BR_CC, MVT::i64, Custom); |
391 | setOperationAction(ISD::BR_CC, MVT::f16, Custom); |
392 | setOperationAction(ISD::BR_CC, MVT::f32, Custom); |
393 | setOperationAction(ISD::BR_CC, MVT::f64, Custom); |
394 | setOperationAction(ISD::SELECT, MVT::i32, Custom); |
395 | setOperationAction(ISD::SELECT, MVT::i64, Custom); |
396 | setOperationAction(ISD::SELECT, MVT::f16, Custom); |
397 | setOperationAction(ISD::SELECT, MVT::f32, Custom); |
398 | setOperationAction(ISD::SELECT, MVT::f64, Custom); |
399 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
400 | setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); |
401 | setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); |
402 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
403 | setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); |
404 | setOperationAction(ISD::BR_JT, MVT::Other, Custom); |
405 | setOperationAction(ISD::JumpTable, MVT::i64, Custom); |
406 | |
407 | setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); |
408 | setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); |
409 | setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); |
410 | |
411 | setOperationAction(ISD::FREM, MVT::f32, Expand); |
412 | setOperationAction(ISD::FREM, MVT::f64, Expand); |
413 | setOperationAction(ISD::FREM, MVT::f80, Expand); |
414 | |
415 | setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); |
416 | |
417 | |
418 | |
419 | setOperationAction(ISD::XOR, MVT::i32, Custom); |
420 | setOperationAction(ISD::XOR, MVT::i64, Custom); |
421 | |
422 | |
423 | |
424 | setOperationAction(ISD::FABS, MVT::f128, Expand); |
425 | setOperationAction(ISD::FADD, MVT::f128, LibCall); |
426 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
427 | setOperationAction(ISD::FCOS, MVT::f128, Expand); |
428 | setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
429 | setOperationAction(ISD::FMA, MVT::f128, Expand); |
430 | setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
431 | setOperationAction(ISD::FNEG, MVT::f128, Expand); |
432 | setOperationAction(ISD::FPOW, MVT::f128, Expand); |
433 | setOperationAction(ISD::FREM, MVT::f128, Expand); |
434 | setOperationAction(ISD::FRINT, MVT::f128, Expand); |
435 | setOperationAction(ISD::FSIN, MVT::f128, Expand); |
436 | setOperationAction(ISD::FSINCOS, MVT::f128, Expand); |
437 | setOperationAction(ISD::FSQRT, MVT::f128, Expand); |
438 | setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
439 | setOperationAction(ISD::FTRUNC, MVT::f128, Expand); |
440 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
441 | setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); |
442 | setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom); |
443 | setOperationAction(ISD::BR_CC, MVT::f128, Custom); |
444 | setOperationAction(ISD::SELECT, MVT::f128, Custom); |
445 | setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); |
446 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
447 | |
448 | |
449 | |
450 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
451 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
452 | setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); |
453 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
454 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
455 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); |
456 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
457 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
458 | setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); |
459 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
460 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
461 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); |
462 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
463 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
464 | setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); |
465 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
466 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
467 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); |
468 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
469 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
470 | setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); |
471 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
472 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
473 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); |
474 | setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); |
475 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
476 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
477 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); |
478 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
479 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
480 | |
481 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); |
482 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
483 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); |
484 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
485 | |
486 | |
487 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
488 | setOperationAction(ISD::VAARG, MVT::Other, Custom); |
489 | setOperationAction(ISD::VACOPY, MVT::Other, Custom); |
490 | setOperationAction(ISD::VAEND, MVT::Other, Expand); |
491 | |
492 | |
493 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
494 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
495 | |
496 | if (Subtarget->isTargetWindows()) |
497 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); |
498 | else |
499 | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); |
500 | |
501 | |
502 | setOperationAction(ISD::ConstantPool, MVT::i64, Custom); |
503 | |
504 | |
505 | setOperationAction(ISD::BlockAddress, MVT::i64, Custom); |
506 | |
507 | |
508 | setOperationAction(ISD::ADDC, MVT::i32, Custom); |
509 | setOperationAction(ISD::ADDE, MVT::i32, Custom); |
510 | setOperationAction(ISD::SUBC, MVT::i32, Custom); |
511 | setOperationAction(ISD::SUBE, MVT::i32, Custom); |
512 | setOperationAction(ISD::ADDC, MVT::i64, Custom); |
513 | setOperationAction(ISD::ADDE, MVT::i64, Custom); |
514 | setOperationAction(ISD::SUBC, MVT::i64, Custom); |
515 | setOperationAction(ISD::SUBE, MVT::i64, Custom); |
516 | |
517 | |
518 | setOperationAction(ISD::ROTL, MVT::i32, Expand); |
519 | setOperationAction(ISD::ROTL, MVT::i64, Expand); |
520 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
521 | setOperationAction(ISD::ROTL, VT, Expand); |
522 | setOperationAction(ISD::ROTR, VT, Expand); |
523 | } |
524 | |
525 | |
526 | setOperationAction(ISD::MULHU, MVT::i32, Expand); |
527 | setOperationAction(ISD::MULHS, MVT::i32, Expand); |
528 | |
529 | |
530 | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); |
531 | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); |
532 | |
533 | setOperationAction(ISD::CTPOP, MVT::i32, Custom); |
534 | setOperationAction(ISD::CTPOP, MVT::i64, Custom); |
535 | setOperationAction(ISD::CTPOP, MVT::i128, Custom); |
536 | |
537 | setOperationAction(ISD::ABS, MVT::i32, Custom); |
538 | setOperationAction(ISD::ABS, MVT::i64, Custom); |
539 | |
540 | setOperationAction(ISD::SDIVREM, MVT::i32, Expand); |
541 | setOperationAction(ISD::SDIVREM, MVT::i64, Expand); |
542 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
543 | setOperationAction(ISD::SDIVREM, VT, Expand); |
544 | setOperationAction(ISD::UDIVREM, VT, Expand); |
545 | } |
546 | setOperationAction(ISD::SREM, MVT::i32, Expand); |
547 | setOperationAction(ISD::SREM, MVT::i64, Expand); |
548 | setOperationAction(ISD::UDIVREM, MVT::i32, Expand); |
549 | setOperationAction(ISD::UDIVREM, MVT::i64, Expand); |
550 | setOperationAction(ISD::UREM, MVT::i32, Expand); |
551 | setOperationAction(ISD::UREM, MVT::i64, Expand); |
552 | |
553 | |
554 | setOperationAction(ISD::SADDO, MVT::i32, Custom); |
555 | setOperationAction(ISD::SADDO, MVT::i64, Custom); |
556 | setOperationAction(ISD::UADDO, MVT::i32, Custom); |
557 | setOperationAction(ISD::UADDO, MVT::i64, Custom); |
558 | setOperationAction(ISD::SSUBO, MVT::i32, Custom); |
559 | setOperationAction(ISD::SSUBO, MVT::i64, Custom); |
560 | setOperationAction(ISD::USUBO, MVT::i32, Custom); |
561 | setOperationAction(ISD::USUBO, MVT::i64, Custom); |
562 | setOperationAction(ISD::SMULO, MVT::i32, Custom); |
563 | setOperationAction(ISD::SMULO, MVT::i64, Custom); |
564 | setOperationAction(ISD::UMULO, MVT::i32, Custom); |
565 | setOperationAction(ISD::UMULO, MVT::i64, Custom); |
566 | |
567 | setOperationAction(ISD::FSIN, MVT::f32, Expand); |
568 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
569 | setOperationAction(ISD::FCOS, MVT::f32, Expand); |
570 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
571 | setOperationAction(ISD::FPOW, MVT::f32, Expand); |
572 | setOperationAction(ISD::FPOW, MVT::f64, Expand); |
573 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); |
574 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
575 | if (Subtarget->hasFullFP16()) |
576 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom); |
577 | else |
578 | setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); |
579 | |
580 | setOperationAction(ISD::FREM, MVT::f16, Promote); |
581 | setOperationAction(ISD::FREM, MVT::v4f16, Expand); |
582 | setOperationAction(ISD::FREM, MVT::v8f16, Expand); |
583 | setOperationAction(ISD::FPOW, MVT::f16, Promote); |
584 | setOperationAction(ISD::FPOW, MVT::v4f16, Expand); |
585 | setOperationAction(ISD::FPOW, MVT::v8f16, Expand); |
586 | setOperationAction(ISD::FPOWI, MVT::f16, Promote); |
587 | setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); |
588 | setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); |
589 | setOperationAction(ISD::FCOS, MVT::f16, Promote); |
590 | setOperationAction(ISD::FCOS, MVT::v4f16, Expand); |
591 | setOperationAction(ISD::FCOS, MVT::v8f16, Expand); |
592 | setOperationAction(ISD::FSIN, MVT::f16, Promote); |
593 | setOperationAction(ISD::FSIN, MVT::v4f16, Expand); |
594 | setOperationAction(ISD::FSIN, MVT::v8f16, Expand); |
595 | setOperationAction(ISD::FSINCOS, MVT::f16, Promote); |
596 | setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand); |
597 | setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand); |
598 | setOperationAction(ISD::FEXP, MVT::f16, Promote); |
599 | setOperationAction(ISD::FEXP, MVT::v4f16, Expand); |
600 | setOperationAction(ISD::FEXP, MVT::v8f16, Expand); |
601 | setOperationAction(ISD::FEXP2, MVT::f16, Promote); |
602 | setOperationAction(ISD::FEXP2, MVT::v4f16, Expand); |
603 | setOperationAction(ISD::FEXP2, MVT::v8f16, Expand); |
604 | setOperationAction(ISD::FLOG, MVT::f16, Promote); |
605 | setOperationAction(ISD::FLOG, MVT::v4f16, Expand); |
606 | setOperationAction(ISD::FLOG, MVT::v8f16, Expand); |
607 | setOperationAction(ISD::FLOG2, MVT::f16, Promote); |
608 | setOperationAction(ISD::FLOG2, MVT::v4f16, Expand); |
609 | setOperationAction(ISD::FLOG2, MVT::v8f16, Expand); |
610 | setOperationAction(ISD::FLOG10, MVT::f16, Promote); |
611 | setOperationAction(ISD::FLOG10, MVT::v4f16, Expand); |
612 | setOperationAction(ISD::FLOG10, MVT::v8f16, Expand); |
613 | |
614 | if (!Subtarget->hasFullFP16()) { |
615 | setOperationAction(ISD::SELECT, MVT::f16, Promote); |
616 | setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); |
617 | setOperationAction(ISD::SETCC, MVT::f16, Promote); |
618 | setOperationAction(ISD::BR_CC, MVT::f16, Promote); |
619 | setOperationAction(ISD::FADD, MVT::f16, Promote); |
620 | setOperationAction(ISD::FSUB, MVT::f16, Promote); |
621 | setOperationAction(ISD::FMUL, MVT::f16, Promote); |
622 | setOperationAction(ISD::FDIV, MVT::f16, Promote); |
623 | setOperationAction(ISD::FMA, MVT::f16, Promote); |
624 | setOperationAction(ISD::FNEG, MVT::f16, Promote); |
625 | setOperationAction(ISD::FABS, MVT::f16, Promote); |
626 | setOperationAction(ISD::FCEIL, MVT::f16, Promote); |
627 | setOperationAction(ISD::FSQRT, MVT::f16, Promote); |
628 | setOperationAction(ISD::FFLOOR, MVT::f16, Promote); |
629 | setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); |
630 | setOperationAction(ISD::FRINT, MVT::f16, Promote); |
631 | setOperationAction(ISD::FROUND, MVT::f16, Promote); |
632 | setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); |
633 | setOperationAction(ISD::FTRUNC, MVT::f16, Promote); |
634 | setOperationAction(ISD::FMINNUM, MVT::f16, Promote); |
635 | setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); |
636 | setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); |
637 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); |
638 | |
639 | |
640 | setOperationAction(ISD::FADD, MVT::v4f16, Promote); |
641 | setOperationAction(ISD::FSUB, MVT::v4f16, Promote); |
642 | setOperationAction(ISD::FMUL, MVT::v4f16, Promote); |
643 | setOperationAction(ISD::FDIV, MVT::v4f16, Promote); |
644 | AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32); |
645 | AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32); |
646 | AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32); |
647 | AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32); |
648 | |
649 | setOperationAction(ISD::FABS, MVT::v4f16, Expand); |
650 | setOperationAction(ISD::FNEG, MVT::v4f16, Expand); |
651 | setOperationAction(ISD::FROUND, MVT::v4f16, Expand); |
652 | setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand); |
653 | setOperationAction(ISD::FMA, MVT::v4f16, Expand); |
654 | setOperationAction(ISD::SETCC, MVT::v4f16, Expand); |
655 | setOperationAction(ISD::BR_CC, MVT::v4f16, Expand); |
656 | setOperationAction(ISD::SELECT, MVT::v4f16, Expand); |
657 | setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand); |
658 | setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand); |
659 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand); |
660 | setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand); |
661 | setOperationAction(ISD::FCEIL, MVT::v4f16, Expand); |
662 | setOperationAction(ISD::FRINT, MVT::v4f16, Expand); |
663 | setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); |
664 | setOperationAction(ISD::FSQRT, MVT::v4f16, Expand); |
665 | |
666 | setOperationAction(ISD::FABS, MVT::v8f16, Expand); |
667 | setOperationAction(ISD::FADD, MVT::v8f16, Expand); |
668 | setOperationAction(ISD::FCEIL, MVT::v8f16, Expand); |
669 | setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand); |
670 | setOperationAction(ISD::FDIV, MVT::v8f16, Expand); |
671 | setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand); |
672 | setOperationAction(ISD::FMA, MVT::v8f16, Expand); |
673 | setOperationAction(ISD::FMUL, MVT::v8f16, Expand); |
674 | setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); |
675 | setOperationAction(ISD::FNEG, MVT::v8f16, Expand); |
676 | setOperationAction(ISD::FROUND, MVT::v8f16, Expand); |
677 | setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand); |
678 | setOperationAction(ISD::FRINT, MVT::v8f16, Expand); |
679 | setOperationAction(ISD::FSQRT, MVT::v8f16, Expand); |
680 | setOperationAction(ISD::FSUB, MVT::v8f16, Expand); |
681 | setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand); |
682 | setOperationAction(ISD::SETCC, MVT::v8f16, Expand); |
683 | setOperationAction(ISD::BR_CC, MVT::v8f16, Expand); |
684 | setOperationAction(ISD::SELECT, MVT::v8f16, Expand); |
685 | setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand); |
686 | setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand); |
687 | } |
688 | |
689 | |
690 | for (MVT Ty : {MVT::f32, MVT::f64}) { |
691 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
692 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
693 | setOperationAction(ISD::FCEIL, Ty, Legal); |
694 | setOperationAction(ISD::FRINT, Ty, Legal); |
695 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
696 | setOperationAction(ISD::FROUND, Ty, Legal); |
697 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
698 | setOperationAction(ISD::FMINNUM, Ty, Legal); |
699 | setOperationAction(ISD::FMAXNUM, Ty, Legal); |
700 | setOperationAction(ISD::FMINIMUM, Ty, Legal); |
701 | setOperationAction(ISD::FMAXIMUM, Ty, Legal); |
702 | setOperationAction(ISD::LROUND, Ty, Legal); |
703 | setOperationAction(ISD::LLROUND, Ty, Legal); |
704 | setOperationAction(ISD::LRINT, Ty, Legal); |
705 | setOperationAction(ISD::LLRINT, Ty, Legal); |
706 | } |
707 | |
708 | if (Subtarget->hasFullFP16()) { |
709 | setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal); |
710 | setOperationAction(ISD::FFLOOR, MVT::f16, Legal); |
711 | setOperationAction(ISD::FCEIL, MVT::f16, Legal); |
712 | setOperationAction(ISD::FRINT, MVT::f16, Legal); |
713 | setOperationAction(ISD::FTRUNC, MVT::f16, Legal); |
714 | setOperationAction(ISD::FROUND, MVT::f16, Legal); |
715 | setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); |
716 | setOperationAction(ISD::FMINNUM, MVT::f16, Legal); |
717 | setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); |
718 | setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); |
719 | setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); |
720 | } |
721 | |
722 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
723 | |
724 | setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); |
725 | setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); |
726 | |
727 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); |
728 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); |
729 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); |
730 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); |
731 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); |
732 | |
733 | |
734 | |
735 | if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) { |
736 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall); |
737 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall); |
738 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall); |
739 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall); |
740 | setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall); |
741 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall); |
742 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall); |
743 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall); |
744 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall); |
745 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall); |
746 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall); |
747 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall); |
748 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall); |
749 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall); |
750 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall); |
751 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall); |
752 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall); |
753 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall); |
754 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall); |
755 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall); |
756 | setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall); |
757 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall); |
758 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall); |
759 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall); |
760 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall); |
761 | #define LCALLNAMES(A, B, N) \ |
762 | setLibcallName(A##N##_RELAX, #B #N "_relax"); \ |
763 | setLibcallName(A##N##_ACQ, #B #N "_acq"); \ |
764 | setLibcallName(A##N##_REL, #B #N "_rel"); \ |
765 | setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel"); |
766 | #define LCALLNAME4(A, B) \ |
767 | LCALLNAMES(A, B, 1) \ |
768 | LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) |
769 | #define LCALLNAME5(A, B) \ |
770 | LCALLNAMES(A, B, 1) \ |
771 | LCALLNAMES(A, B, 2) \ |
772 | LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16) |
773 | LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas) |
774 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp) |
775 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd) |
776 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset) |
777 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr) |
778 | LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor) |
779 | #undef LCALLNAMES |
780 | #undef LCALLNAME4 |
781 | #undef LCALLNAME5 |
782 | } |
783 | |
784 | |
785 | setOperationAction(ISD::LOAD, MVT::i128, Custom); |
786 | setOperationAction(ISD::STORE, MVT::i128, Custom); |
787 | |
788 | |
789 | |
790 | |
791 | setOperationAction(ISD::STORE, MVT::v32i8, Custom); |
792 | setOperationAction(ISD::STORE, MVT::v16i16, Custom); |
793 | setOperationAction(ISD::STORE, MVT::v16f16, Custom); |
794 | setOperationAction(ISD::STORE, MVT::v8i32, Custom); |
795 | setOperationAction(ISD::STORE, MVT::v8f32, Custom); |
796 | setOperationAction(ISD::STORE, MVT::v4f64, Custom); |
797 | setOperationAction(ISD::STORE, MVT::v4i64, Custom); |
798 | |
799 | |
800 | |
801 | if (Subtarget->hasPerfMon()) |
802 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); |
803 | |
804 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
805 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
806 | |
807 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
808 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
809 | } else { |
810 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
811 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
812 | } |
813 | |
814 | if (Subtarget->getTargetTriple().isOSMSVCRT()) { |
815 | |
816 | setLibcallName(RTLIB::POWI_F32, nullptr); |
817 | setLibcallName(RTLIB::POWI_F64, nullptr); |
818 | } |
819 | |
820 | |
821 | |
822 | if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { |
823 | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); |
824 | setOperationAction(ISD::ConstantFP, MVT::f64, Legal); |
825 | } |
826 | |
827 | |
828 | |
829 | for (MVT VT : MVT::fp_valuetypes()) { |
830 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); |
831 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); |
832 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); |
833 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); |
834 | } |
835 | for (MVT VT : MVT::integer_valuetypes()) |
836 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand); |
837 | |
838 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
839 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
840 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
841 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
842 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
843 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
844 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
845 | |
846 | setOperationAction(ISD::BITCAST, MVT::i16, Custom); |
847 | setOperationAction(ISD::BITCAST, MVT::f16, Custom); |
848 | setOperationAction(ISD::BITCAST, MVT::bf16, Custom); |
849 | |
850 | |
851 | for (unsigned im = (unsigned)ISD::PRE_INC; |
852 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
853 | setIndexedLoadAction(im, MVT::i8, Legal); |
854 | setIndexedLoadAction(im, MVT::i16, Legal); |
855 | setIndexedLoadAction(im, MVT::i32, Legal); |
856 | setIndexedLoadAction(im, MVT::i64, Legal); |
857 | setIndexedLoadAction(im, MVT::f64, Legal); |
858 | setIndexedLoadAction(im, MVT::f32, Legal); |
859 | setIndexedLoadAction(im, MVT::f16, Legal); |
860 | setIndexedLoadAction(im, MVT::bf16, Legal); |
861 | setIndexedStoreAction(im, MVT::i8, Legal); |
862 | setIndexedStoreAction(im, MVT::i16, Legal); |
863 | setIndexedStoreAction(im, MVT::i32, Legal); |
864 | setIndexedStoreAction(im, MVT::i64, Legal); |
865 | setIndexedStoreAction(im, MVT::f64, Legal); |
866 | setIndexedStoreAction(im, MVT::f32, Legal); |
867 | setIndexedStoreAction(im, MVT::f16, Legal); |
868 | setIndexedStoreAction(im, MVT::bf16, Legal); |
869 | } |
870 | |
871 | |
872 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
873 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
874 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
875 | |
876 | |
877 | setTargetDAGCombine(ISD::OR); |
878 | |
879 | setTargetDAGCombine(ISD::AND); |
880 | |
881 | |
882 | |
883 | setTargetDAGCombine(ISD::ADD); |
884 | setTargetDAGCombine(ISD::ABS); |
885 | setTargetDAGCombine(ISD::SUB); |
886 | setTargetDAGCombine(ISD::SRL); |
887 | setTargetDAGCombine(ISD::XOR); |
888 | setTargetDAGCombine(ISD::SINT_TO_FP); |
889 | setTargetDAGCombine(ISD::UINT_TO_FP); |
890 | |
891 | |
892 | setTargetDAGCombine(ISD::FP_TO_SINT); |
893 | setTargetDAGCombine(ISD::FP_TO_UINT); |
894 | setTargetDAGCombine(ISD::FDIV); |
895 | |
896 | |
897 | setTargetDAGCombine(ISD::SETCC); |
898 | |
899 | setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); |
900 | |
901 | setTargetDAGCombine(ISD::ANY_EXTEND); |
902 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
903 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
904 | setTargetDAGCombine(ISD::VECTOR_SPLICE); |
905 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
906 | setTargetDAGCombine(ISD::TRUNCATE); |
907 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
908 | setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
909 | setTargetDAGCombine(ISD::STORE); |
910 | if (Subtarget->supportsAddressTopByteIgnored()) |
911 | setTargetDAGCombine(ISD::LOAD); |
912 | |
913 | setTargetDAGCombine(ISD::MUL); |
914 | |
915 | setTargetDAGCombine(ISD::SELECT); |
916 | setTargetDAGCombine(ISD::VSELECT); |
917 | |
918 | setTargetDAGCombine(ISD::INTRINSIC_VOID); |
919 | setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); |
920 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
921 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
922 | setTargetDAGCombine(ISD::VECREDUCE_ADD); |
923 | setTargetDAGCombine(ISD::STEP_VECTOR); |
924 | |
925 | setTargetDAGCombine(ISD::GlobalAddress); |
926 | |
927 | |
928 | MaxStoresPerMemsetOptSize = 8; |
929 | MaxStoresPerMemset = Subtarget->requiresStrictAlign() |
930 | ? MaxStoresPerMemsetOptSize : 32; |
931 | |
932 | MaxGluedStoresPerMemcpy = 4; |
933 | MaxStoresPerMemcpyOptSize = 4; |
934 | MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() |
935 | ? MaxStoresPerMemcpyOptSize : 16; |
936 | |
937 | MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; |
938 | |
939 | MaxLoadsPerMemcmpOptSize = 4; |
940 | MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() |
941 | ? MaxLoadsPerMemcmpOptSize : 8; |
942 | |
943 | setStackPointerRegisterToSaveRestore(AArch64::SP); |
944 | |
945 | setSchedulingPreference(Sched::Hybrid); |
946 | |
947 | EnableExtLdPromotion = true; |
948 | |
949 | |
950 | setMinFunctionAlignment(Align(4)); |
951 | |
952 | setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment())); |
953 | setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment())); |
954 | |
955 | |
956 | |
957 | unsigned MaxJT = STI.getMaximumJumpTableSize(); |
958 | if (MaxJT && getMaximumJumpTableSize() == UINT_MAX) |
959 | setMaximumJumpTableSize(MaxJT); |
960 | |
961 | setHasExtractBitsInsn(true); |
962 | |
963 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
964 | |
965 | if (Subtarget->hasNEON()) { |
966 | |
967 | |
968 | setOperationAction(ISD::FABS, MVT::v1f64, Expand); |
969 | setOperationAction(ISD::FADD, MVT::v1f64, Expand); |
970 | setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); |
971 | setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); |
972 | setOperationAction(ISD::FCOS, MVT::v1f64, Expand); |
973 | setOperationAction(ISD::FDIV, MVT::v1f64, Expand); |
974 | setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); |
975 | setOperationAction(ISD::FMA, MVT::v1f64, Expand); |
976 | setOperationAction(ISD::FMUL, MVT::v1f64, Expand); |
977 | setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); |
978 | setOperationAction(ISD::FNEG, MVT::v1f64, Expand); |
979 | setOperationAction(ISD::FPOW, MVT::v1f64, Expand); |
980 | setOperationAction(ISD::FREM, MVT::v1f64, Expand); |
981 | setOperationAction(ISD::FROUND, MVT::v1f64, Expand); |
982 | setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand); |
983 | setOperationAction(ISD::FRINT, MVT::v1f64, Expand); |
984 | setOperationAction(ISD::FSIN, MVT::v1f64, Expand); |
985 | setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); |
986 | setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); |
987 | setOperationAction(ISD::FSUB, MVT::v1f64, Expand); |
988 | setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); |
989 | setOperationAction(ISD::SETCC, MVT::v1f64, Expand); |
990 | setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); |
991 | setOperationAction(ISD::SELECT, MVT::v1f64, Expand); |
992 | setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); |
993 | setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); |
994 | |
995 | setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); |
996 | setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); |
997 | setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); |
998 | setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); |
999 | setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); |
1000 | |
1001 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand); |
1002 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand); |
1003 | |
1004 | setOperationAction(ISD::MUL, MVT::v1i64, Expand); |
1005 | |
1006 | |
1007 | |
1008 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); |
1009 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); |
1010 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); |
1011 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); |
1012 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32); |
1013 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32); |
1014 | |
1015 | |
1016 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
1017 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
1018 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); |
1019 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); |
1020 | |
1021 | |
1022 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); |
1023 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
1024 | |
1025 | if (Subtarget->hasFullFP16()) { |
1026 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); |
1027 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); |
1028 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); |
1029 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); |
1030 | } else { |
1031 | |
1032 | |
1033 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); |
1034 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); |
1035 | setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); |
1036 | setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); |
1037 | } |
1038 | |
1039 | setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); |
1040 | setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); |
1041 | setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal); |
1042 | setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal); |
1043 | setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom); |
1044 | setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom); |
1045 | setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); |
1046 | setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom); |
1047 | for (auto VT : {MVT::v1i64, MVT::v2i64}) { |
1048 | setOperationAction(ISD::UMAX, VT, Custom); |
1049 | setOperationAction(ISD::SMAX, VT, Custom); |
1050 | setOperationAction(ISD::UMIN, VT, Custom); |
1051 | setOperationAction(ISD::SMIN, VT, Custom); |
1052 | } |
1053 | |
1054 | |
1055 | setOperationAction(ISD::MUL, MVT::v2i64, Expand); |
1056 | |
1057 | setOperationAction(ISD::MUL, MVT::v8i16, Custom); |
1058 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
1059 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
1060 | |
1061 | |
1062 | for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
1063 | MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1064 | setOperationAction(ISD::SADDSAT, VT, Legal); |
1065 | setOperationAction(ISD::UADDSAT, VT, Legal); |
1066 | setOperationAction(ISD::SSUBSAT, VT, Legal); |
1067 | setOperationAction(ISD::USUBSAT, VT, Legal); |
1068 | } |
1069 | |
1070 | for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, |
1071 | MVT::v4i32}) { |
1072 | setOperationAction(ISD::ABDS, VT, Legal); |
1073 | setOperationAction(ISD::ABDU, VT, Legal); |
1074 | } |
1075 | |
1076 | |
1077 | for (MVT VT : { MVT::v4f16, MVT::v2f32, |
1078 | MVT::v8f16, MVT::v4f32, MVT::v2f64 }) { |
1079 | if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { |
1080 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1081 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1082 | |
1083 | setOperationAction(ISD::VECREDUCE_FADD, VT, Legal); |
1084 | } |
1085 | } |
1086 | for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, |
1087 | MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
1088 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1089 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1090 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1091 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1092 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1093 | } |
1094 | setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom); |
1095 | |
1096 | setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); |
1097 | setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); |
1098 | |
1099 | |
1100 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
1101 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); |
1102 | |
1103 | if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) { |
1104 | setOperationAction(ISD::MULHS, VT, Legal); |
1105 | setOperationAction(ISD::MULHU, VT, Legal); |
1106 | } else { |
1107 | setOperationAction(ISD::MULHS, VT, Expand); |
1108 | setOperationAction(ISD::MULHU, VT, Expand); |
1109 | } |
1110 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
1111 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
1112 | |
1113 | setOperationAction(ISD::BSWAP, VT, Expand); |
1114 | setOperationAction(ISD::CTTZ, VT, Expand); |
1115 | |
1116 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
1117 | setTruncStoreAction(VT, InnerVT, Expand); |
1118 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
1119 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
1120 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
1121 | } |
1122 | } |
1123 | |
1124 | |
1125 | for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) { |
1126 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
1127 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
1128 | setOperationAction(ISD::FCEIL, Ty, Legal); |
1129 | setOperationAction(ISD::FRINT, Ty, Legal); |
1130 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
1131 | setOperationAction(ISD::FROUND, Ty, Legal); |
1132 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
1133 | } |
1134 | |
1135 | if (Subtarget->hasFullFP16()) { |
1136 | for (MVT Ty : {MVT::v4f16, MVT::v8f16}) { |
1137 | setOperationAction(ISD::FFLOOR, Ty, Legal); |
1138 | setOperationAction(ISD::FNEARBYINT, Ty, Legal); |
1139 | setOperationAction(ISD::FCEIL, Ty, Legal); |
1140 | setOperationAction(ISD::FRINT, Ty, Legal); |
1141 | setOperationAction(ISD::FTRUNC, Ty, Legal); |
1142 | setOperationAction(ISD::FROUND, Ty, Legal); |
1143 | setOperationAction(ISD::FROUNDEVEN, Ty, Legal); |
1144 | } |
1145 | } |
1146 | |
1147 | if (Subtarget->hasSVE()) |
1148 | setOperationAction(ISD::VSCALE, MVT::i32, Custom); |
1149 | |
1150 | setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); |
1151 | |
1152 | setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
1153 | setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
1154 | setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom); |
1155 | setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
1156 | setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
1157 | setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); |
1158 | } |
1159 | |
1160 | if (Subtarget->hasSVE()) { |
1161 | for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { |
1162 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1163 | setOperationAction(ISD::BSWAP, VT, Custom); |
1164 | setOperationAction(ISD::CTLZ, VT, Custom); |
1165 | setOperationAction(ISD::CTPOP, VT, Custom); |
1166 | setOperationAction(ISD::CTTZ, VT, Custom); |
1167 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1168 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
1169 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
1170 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1171 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1172 | setOperationAction(ISD::MGATHER, VT, Custom); |
1173 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1174 | setOperationAction(ISD::MLOAD, VT, Custom); |
1175 | setOperationAction(ISD::MUL, VT, Custom); |
1176 | setOperationAction(ISD::MULHS, VT, Custom); |
1177 | setOperationAction(ISD::MULHU, VT, Custom); |
1178 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1179 | setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
1180 | setOperationAction(ISD::SELECT, VT, Custom); |
1181 | setOperationAction(ISD::SETCC, VT, Custom); |
1182 | setOperationAction(ISD::SDIV, VT, Custom); |
1183 | setOperationAction(ISD::UDIV, VT, Custom); |
1184 | setOperationAction(ISD::SMIN, VT, Custom); |
1185 | setOperationAction(ISD::UMIN, VT, Custom); |
1186 | setOperationAction(ISD::SMAX, VT, Custom); |
1187 | setOperationAction(ISD::UMAX, VT, Custom); |
1188 | setOperationAction(ISD::SHL, VT, Custom); |
1189 | setOperationAction(ISD::SRL, VT, Custom); |
1190 | setOperationAction(ISD::SRA, VT, Custom); |
1191 | setOperationAction(ISD::ABS, VT, Custom); |
1192 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1193 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1194 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1195 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1196 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1197 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1198 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1199 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1200 | |
1201 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
1202 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
1203 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1204 | setOperationAction(ISD::ROTL, VT, Expand); |
1205 | setOperationAction(ISD::ROTR, VT, Expand); |
1206 | } |
1207 | |
1208 | |
1209 | for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { |
1210 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1211 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1212 | } |
1213 | |
1214 | |
1215 | for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16, |
1216 | MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32}) |
1217 | setOperationAction(ISD::BITCAST, VT, Custom); |
1218 | |
1219 | for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { |
1220 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1221 | setOperationAction(ISD::SELECT, VT, Custom); |
1222 | setOperationAction(ISD::SETCC, VT, Custom); |
1223 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1224 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1225 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1226 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1227 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1228 | |
1229 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1230 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1231 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1232 | |
1233 | |
1234 | if (VT != MVT::nxv16i1) { |
1235 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
1236 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
1237 | } |
1238 | } |
1239 | |
1240 | |
1241 | for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64, |
1242 | MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
1243 | MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) { |
1244 | setOperationAction(ISD::MLOAD, VT, Custom); |
1245 | setOperationAction(ISD::MSTORE, VT, Custom); |
1246 | setOperationAction(ISD::MGATHER, VT, Custom); |
1247 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1248 | } |
1249 | |
1250 | for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { |
1251 | for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) { |
1252 | |
1253 | |
1254 | setTruncStoreAction(VT, InnerVT, Expand); |
1255 | |
1256 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
1257 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
1258 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
1259 | } |
1260 | } |
1261 | |
1262 | |
1263 | setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom); |
1264 | setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom); |
1265 | setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom); |
1266 | setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); |
1267 | setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); |
1268 | |
1269 | for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, |
1270 | MVT::nxv4f32, MVT::nxv2f64}) { |
1271 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1272 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1273 | setOperationAction(ISD::MGATHER, VT, Custom); |
1274 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1275 | setOperationAction(ISD::MLOAD, VT, Custom); |
1276 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1277 | setOperationAction(ISD::SELECT, VT, Custom); |
1278 | setOperationAction(ISD::FADD, VT, Custom); |
1279 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1280 | setOperationAction(ISD::FDIV, VT, Custom); |
1281 | setOperationAction(ISD::FMA, VT, Custom); |
1282 | setOperationAction(ISD::FMAXIMUM, VT, Custom); |
1283 | setOperationAction(ISD::FMAXNUM, VT, Custom); |
1284 | setOperationAction(ISD::FMINIMUM, VT, Custom); |
1285 | setOperationAction(ISD::FMINNUM, VT, Custom); |
1286 | setOperationAction(ISD::FMUL, VT, Custom); |
1287 | setOperationAction(ISD::FNEG, VT, Custom); |
1288 | setOperationAction(ISD::FSUB, VT, Custom); |
1289 | setOperationAction(ISD::FCEIL, VT, Custom); |
1290 | setOperationAction(ISD::FFLOOR, VT, Custom); |
1291 | setOperationAction(ISD::FNEARBYINT, VT, Custom); |
1292 | setOperationAction(ISD::FRINT, VT, Custom); |
1293 | setOperationAction(ISD::FROUND, VT, Custom); |
1294 | setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
1295 | setOperationAction(ISD::FTRUNC, VT, Custom); |
1296 | setOperationAction(ISD::FSQRT, VT, Custom); |
1297 | setOperationAction(ISD::FABS, VT, Custom); |
1298 | setOperationAction(ISD::FP_EXTEND, VT, Custom); |
1299 | setOperationAction(ISD::FP_ROUND, VT, Custom); |
1300 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1301 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1302 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1303 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1304 | setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
1305 | |
1306 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1307 | } |
1308 | |
1309 | for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) { |
1310 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1311 | setOperationAction(ISD::MGATHER, VT, Custom); |
1312 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1313 | setOperationAction(ISD::MLOAD, VT, Custom); |
1314 | } |
1315 | |
1316 | setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); |
1317 | |
1318 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); |
1319 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom); |
1320 | |
1321 | |
1322 | |
1323 | if (Subtarget->useSVEForFixedLengthVectors()) { |
1324 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) |
1325 | if (useSVEForFixedLengthVectorVT(VT)) |
1326 | addTypeForFixedLengthSVE(VT); |
1327 | for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) |
1328 | if (useSVEForFixedLengthVectorVT(VT)) |
1329 | addTypeForFixedLengthSVE(VT); |
1330 | |
1331 | |
1332 | for (auto VT : {MVT::v8i8, MVT::v4i16}) |
1333 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1334 | setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom); |
1335 | |
1336 | |
1337 | for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) |
1338 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1339 | for (auto VT : {MVT::v8f16, MVT::v4f32}) |
1340 | setOperationAction(ISD::FP_ROUND, VT, Custom); |
1341 | |
1342 | |
1343 | setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom); |
1344 | setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); |
1345 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
1346 | setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); |
1347 | setOperationAction(ISD::MUL, MVT::v1i64, Custom); |
1348 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
1349 | setOperationAction(ISD::MULHS, MVT::v1i64, Custom); |
1350 | setOperationAction(ISD::MULHS, MVT::v2i64, Custom); |
1351 | setOperationAction(ISD::MULHU, MVT::v1i64, Custom); |
1352 | setOperationAction(ISD::MULHU, MVT::v2i64, Custom); |
1353 | setOperationAction(ISD::SDIV, MVT::v8i8, Custom); |
1354 | setOperationAction(ISD::SDIV, MVT::v16i8, Custom); |
1355 | setOperationAction(ISD::SDIV, MVT::v4i16, Custom); |
1356 | setOperationAction(ISD::SDIV, MVT::v8i16, Custom); |
1357 | setOperationAction(ISD::SDIV, MVT::v2i32, Custom); |
1358 | setOperationAction(ISD::SDIV, MVT::v4i32, Custom); |
1359 | setOperationAction(ISD::SDIV, MVT::v1i64, Custom); |
1360 | setOperationAction(ISD::SDIV, MVT::v2i64, Custom); |
1361 | setOperationAction(ISD::SMAX, MVT::v1i64, Custom); |
1362 | setOperationAction(ISD::SMAX, MVT::v2i64, Custom); |
1363 | setOperationAction(ISD::SMIN, MVT::v1i64, Custom); |
1364 | setOperationAction(ISD::SMIN, MVT::v2i64, Custom); |
1365 | setOperationAction(ISD::UDIV, MVT::v8i8, Custom); |
1366 | setOperationAction(ISD::UDIV, MVT::v16i8, Custom); |
1367 | setOperationAction(ISD::UDIV, MVT::v4i16, Custom); |
1368 | setOperationAction(ISD::UDIV, MVT::v8i16, Custom); |
1369 | setOperationAction(ISD::UDIV, MVT::v2i32, Custom); |
1370 | setOperationAction(ISD::UDIV, MVT::v4i32, Custom); |
1371 | setOperationAction(ISD::UDIV, MVT::v1i64, Custom); |
1372 | setOperationAction(ISD::UDIV, MVT::v2i64, Custom); |
1373 | setOperationAction(ISD::UMAX, MVT::v1i64, Custom); |
1374 | setOperationAction(ISD::UMAX, MVT::v2i64, Custom); |
1375 | setOperationAction(ISD::UMIN, MVT::v1i64, Custom); |
1376 | setOperationAction(ISD::UMIN, MVT::v2i64, Custom); |
1377 | setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom); |
1378 | setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom); |
1379 | setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom); |
1380 | setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom); |
1381 | |
1382 | |
1383 | for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
1384 | MVT::v2i32, MVT::v4i32, MVT::v2i64}) { |
1385 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1386 | setOperationAction(ISD::CTTZ, VT, Custom); |
1387 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1388 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1389 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1390 | } |
1391 | |
1392 | |
1393 | for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, |
1394 | MVT::v1f64, MVT::v2f64}) |
1395 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1396 | |
1397 | |
1398 | for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) |
1399 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1400 | } |
1401 | |
1402 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64); |
1403 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32); |
1404 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16); |
1405 | setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); |
1406 | } |
1407 | |
1408 | PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); |
1409 | } |
1410 | |
1411 | void AArch64TargetLowering::addTypeForNEON(MVT VT) { |
1412 | assert(VT.isVector() && "VT should be a vector type"); |
1413 | |
1414 | if (VT.isFloatingPoint()) { |
1415 | MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT(); |
1416 | setOperationPromotedToType(ISD::LOAD, VT, PromoteTo); |
1417 | setOperationPromotedToType(ISD::STORE, VT, PromoteTo); |
1418 | } |
1419 | |
1420 | |
1421 | if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { |
1422 | setOperationAction(ISD::FSIN, VT, Expand); |
1423 | setOperationAction(ISD::FCOS, VT, Expand); |
1424 | setOperationAction(ISD::FPOW, VT, Expand); |
1425 | setOperationAction(ISD::FLOG, VT, Expand); |
1426 | setOperationAction(ISD::FLOG2, VT, Expand); |
1427 | setOperationAction(ISD::FLOG10, VT, Expand); |
1428 | setOperationAction(ISD::FEXP, VT, Expand); |
1429 | setOperationAction(ISD::FEXP2, VT, Expand); |
1430 | } |
1431 | |
1432 | |
1433 | if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 || |
1434 | ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16())) |
1435 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1436 | |
1437 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1438 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1439 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1440 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1441 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1442 | setOperationAction(ISD::SRA, VT, Custom); |
1443 | setOperationAction(ISD::SRL, VT, Custom); |
1444 | setOperationAction(ISD::SHL, VT, Custom); |
1445 | setOperationAction(ISD::OR, VT, Custom); |
1446 | setOperationAction(ISD::SETCC, VT, Custom); |
1447 | setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); |
1448 | |
1449 | setOperationAction(ISD::SELECT, VT, Expand); |
1450 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
1451 | setOperationAction(ISD::VSELECT, VT, Expand); |
1452 | for (MVT InnerVT : MVT::all_valuetypes()) |
1453 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
1454 | |
1455 | |
1456 | if (VT != MVT::v8i8 && VT != MVT::v16i8) |
1457 | setOperationAction(ISD::CTPOP, VT, Custom); |
1458 | |
1459 | setOperationAction(ISD::UDIV, VT, Expand); |
1460 | setOperationAction(ISD::SDIV, VT, Expand); |
1461 | setOperationAction(ISD::UREM, VT, Expand); |
1462 | setOperationAction(ISD::SREM, VT, Expand); |
1463 | setOperationAction(ISD::FREM, VT, Expand); |
1464 | |
1465 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1466 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1467 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
1468 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
1469 | |
1470 | if (!VT.isFloatingPoint()) |
1471 | setOperationAction(ISD::ABS, VT, Legal); |
1472 | |
1473 | |
1474 | if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) |
1475 | for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) |
1476 | setOperationAction(Opcode, VT, Legal); |
1477 | |
1478 | |
1479 | if (VT.isFloatingPoint() && |
1480 | VT.getVectorElementType() != MVT::bf16 && |
1481 | (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) |
1482 | for (unsigned Opcode : |
1483 | {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM}) |
1484 | setOperationAction(Opcode, VT, Legal); |
1485 | |
1486 | if (Subtarget->isLittleEndian()) { |
1487 | for (unsigned im = (unsigned)ISD::PRE_INC; |
1488 | im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { |
1489 | setIndexedLoadAction(im, VT, Legal); |
1490 | setIndexedStoreAction(im, VT, Legal); |
1491 | } |
1492 | } |
1493 | } |
1494 | |
1495 | void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { |
1496 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
1497 | |
1498 | |
1499 | for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) |
1500 | setOperationAction(Op, VT, Expand); |
1501 | |
1502 | |
1503 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1504 | |
1505 | if (VT.isFloatingPoint()) { |
1506 | setCondCodeAction(ISD::SETO, VT, Expand); |
1507 | setCondCodeAction(ISD::SETOLT, VT, Expand); |
1508 | setCondCodeAction(ISD::SETLT, VT, Expand); |
1509 | setCondCodeAction(ISD::SETOLE, VT, Expand); |
1510 | setCondCodeAction(ISD::SETLE, VT, Expand); |
1511 | setCondCodeAction(ISD::SETULT, VT, Expand); |
1512 | setCondCodeAction(ISD::SETULE, VT, Expand); |
1513 | setCondCodeAction(ISD::SETUGE, VT, Expand); |
1514 | setCondCodeAction(ISD::SETUGT, VT, Expand); |
1515 | setCondCodeAction(ISD::SETUEQ, VT, Expand); |
1516 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
1517 | } |
1518 | |
1519 | |
1520 | if (VT.isInteger()) { |
1521 | MVT InnerVT = VT.changeVectorElementType(MVT::i8); |
1522 | while (InnerVT != VT) { |
1523 | setTruncStoreAction(VT, InnerVT, Custom); |
1524 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom); |
1525 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom); |
1526 | InnerVT = InnerVT.changeVectorElementType( |
1527 | MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits())); |
1528 | } |
1529 | } |
1530 | |
1531 | |
1532 | setOperationAction(ISD::ABS, VT, Custom); |
1533 | setOperationAction(ISD::ADD, VT, Custom); |
1534 | setOperationAction(ISD::AND, VT, Custom); |
1535 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1536 | setOperationAction(ISD::BITCAST, VT, Custom); |
1537 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1538 | setOperationAction(ISD::BSWAP, VT, Custom); |
1539 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1540 | setOperationAction(ISD::CTLZ, VT, Custom); |
1541 | setOperationAction(ISD::CTPOP, VT, Custom); |
1542 | setOperationAction(ISD::CTTZ, VT, Custom); |
1543 | setOperationAction(ISD::FABS, VT, Custom); |
1544 | setOperationAction(ISD::FADD, VT, Custom); |
1545 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1546 | setOperationAction(ISD::FCEIL, VT, Custom); |
1547 | setOperationAction(ISD::FDIV, VT, Custom); |
1548 | setOperationAction(ISD::FFLOOR, VT, Custom); |
1549 | setOperationAction(ISD::FMA, VT, Custom); |
1550 | setOperationAction(ISD::FMAXIMUM, VT, Custom); |
1551 | setOperationAction(ISD::FMAXNUM, VT, Custom); |
1552 | setOperationAction(ISD::FMINIMUM, VT, Custom); |
1553 | setOperationAction(ISD::FMINNUM, VT, Custom); |
1554 | setOperationAction(ISD::FMUL, VT, Custom); |
1555 | setOperationAction(ISD::FNEARBYINT, VT, Custom); |
1556 | setOperationAction(ISD::FNEG, VT, Custom); |
1557 | setOperationAction(ISD::FP_EXTEND, VT, Custom); |
1558 | setOperationAction(ISD::FP_ROUND, VT, Custom); |
1559 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1560 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1561 | setOperationAction(ISD::FRINT, VT, Custom); |
1562 | setOperationAction(ISD::FROUND, VT, Custom); |
1563 | setOperationAction(ISD::FROUNDEVEN, VT, Custom); |
1564 | setOperationAction(ISD::FSQRT, VT, Custom); |
1565 | setOperationAction(ISD::FSUB, VT, Custom); |
1566 | setOperationAction(ISD::FTRUNC, VT, Custom); |
1567 | setOperationAction(ISD::LOAD, VT, Custom); |
1568 | setOperationAction(ISD::MGATHER, VT, Custom); |
1569 | setOperationAction(ISD::MLOAD, VT, Custom); |
1570 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1571 | setOperationAction(ISD::MSTORE, VT, Custom); |
1572 | setOperationAction(ISD::MUL, VT, Custom); |
1573 | setOperationAction(ISD::MULHS, VT, Custom); |
1574 | setOperationAction(ISD::MULHU, VT, Custom); |
1575 | setOperationAction(ISD::OR, VT, Custom); |
1576 | setOperationAction(ISD::SDIV, VT, Custom); |
1577 | setOperationAction(ISD::SELECT, VT, Custom); |
1578 | setOperationAction(ISD::SETCC, VT, Custom); |
1579 | setOperationAction(ISD::SHL, VT, Custom); |
1580 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1581 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); |
1582 | setOperationAction(ISD::SINT_TO_FP, VT, Custom); |
1583 | setOperationAction(ISD::SMAX, VT, Custom); |
1584 | setOperationAction(ISD::SMIN, VT, Custom); |
1585 | setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); |
1586 | setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); |
1587 | setOperationAction(ISD::SRA, VT, Custom); |
1588 | setOperationAction(ISD::SRL, VT, Custom); |
1589 | setOperationAction(ISD::STORE, VT, Custom); |
1590 | setOperationAction(ISD::SUB, VT, Custom); |
1591 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1592 | setOperationAction(ISD::UDIV, VT, Custom); |
1593 | setOperationAction(ISD::UINT_TO_FP, VT, Custom); |
1594 | setOperationAction(ISD::UMAX, VT, Custom); |
1595 | setOperationAction(ISD::UMIN, VT, Custom); |
1596 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
1597 | setOperationAction(ISD::VECREDUCE_AND, VT, Custom); |
1598 | setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); |
1599 | setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); |
1600 | setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); |
1601 | setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); |
1602 | setOperationAction(ISD::VECREDUCE_OR, VT, Custom); |
1603 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1604 | setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); |
1605 | setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); |
1606 | setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); |
1607 | setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); |
1608 | setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); |
1609 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1610 | setOperationAction(ISD::VSELECT, VT, Custom); |
1611 | setOperationAction(ISD::XOR, VT, Custom); |
1612 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1613 | } |
1614 | |
1615 | void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { |
1616 | addRegisterClass(VT, &AArch64::FPR64RegClass); |
1617 | addTypeForNEON(VT); |
1618 | } |
1619 | |
1620 | void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { |
1621 | addRegisterClass(VT, &AArch64::FPR128RegClass); |
1622 | addTypeForNEON(VT); |
1623 | } |
1624 | |
1625 | EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, |
1626 | LLVMContext &C, EVT VT) const { |
1627 | if (!VT.isVector()) |
1628 | return MVT::i32; |
1629 | if (VT.isScalableVector()) |
1630 | return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount()); |
1631 | return VT.changeVectorElementTypeToInteger(); |
1632 | } |
1633 | |
1634 | static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, |
1635 | const APInt &Demanded, |
1636 | TargetLowering::TargetLoweringOpt &TLO, |
1637 | unsigned NewOpc) { |
1638 | uint64_t OldImm = Imm, NewImm, Enc; |
1639 | uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; |
1640 | |
1641 | |
1642 | |
1643 | if (Imm == 0 || Imm == Mask || |
1644 | AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) |
1645 | return false; |
1646 | |
1647 | unsigned EltSize = Size; |
1648 | uint64_t DemandedBits = Demanded.getZExtValue(); |
1649 | |
1650 | |
1651 | Imm &= DemandedBits; |
1652 | |
1653 | while (true) { |
1654 | |
1655 | |
1656 | |
1657 | |
1658 | |
1659 | |
1660 | |
1661 | uint64_t NonDemandedBits = ~DemandedBits; |
1662 | uint64_t InvertedImm = ~Imm & DemandedBits; |
1663 | uint64_t RotatedImm = |
1664 | ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & |
1665 | NonDemandedBits; |
1666 | uint64_t Sum = RotatedImm + NonDemandedBits; |
1667 | bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); |
1668 | uint64_t Ones = (Sum + Carry) & NonDemandedBits; |
1669 | NewImm = (Imm | Ones) & Mask; |
1670 | |
1671 | |
1672 | |
1673 | |
1674 | if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) |
1675 | break; |
1676 | |
1677 | |
1678 | if (EltSize == 2) |
1679 | return false; |
1680 | |
1681 | EltSize /= 2; |
1682 | Mask >>= EltSize; |
1683 | uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; |
1684 | |
1685 | |
1686 | if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) |
1687 | return false; |
1688 | |
1689 | |
1690 | Imm |= Hi; |
1691 | DemandedBits |= DemandedBitsHi; |
1692 | } |
1693 | |
1694 | ++NumOptimizedImms; |
1695 | |
1696 | |
1697 | while (EltSize < Size) { |
1698 | NewImm |= NewImm << EltSize; |
1699 | EltSize *= 2; |
1700 | } |
1701 | |
1702 | (void)OldImm; |
1703 | assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && |
1704 | "demanded bits should never be altered"); |
1705 | assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); |
1706 | |
1707 | |
1708 | EVT VT = Op.getValueType(); |
1709 | SDLoc DL(Op); |
1710 | SDValue New; |
1711 | |
1712 | |
1713 | |
1714 | if (NewImm == 0 || NewImm == OrigMask) { |
1715 | New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), |
1716 | TLO.DAG.getConstant(NewImm, DL, VT)); |
1717 | |
1718 | |
1719 | } else { |
1720 | Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); |
1721 | SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); |
1722 | New = SDValue( |
1723 | TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); |
1724 | } |
1725 | |
1726 | return TLO.CombineTo(Op, New); |
1727 | } |
1728 | |
1729 | bool AArch64TargetLowering::targetShrinkDemandedConstant( |
1730 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
1731 | TargetLoweringOpt &TLO) const { |
1732 | |
1733 | if (!TLO.LegalOps) |
1734 | return false; |
1735 | |
1736 | if (!EnableOptimizeLogicalImm) |
1737 | return false; |
1738 | |
1739 | EVT VT = Op.getValueType(); |
1740 | if (VT.isVector()) |
1741 | return false; |
1742 | |
1743 | unsigned Size = VT.getSizeInBits(); |
1744 | assert((Size == 32 || Size == 64) && |
1745 | "i32 or i64 is expected after legalization."); |
1746 | |
1747 | |
1748 | if (DemandedBits.countPopulation() == Size) |
1749 | return false; |
1750 | |
1751 | unsigned NewOpc; |
1752 | switch (Op.getOpcode()) { |
1753 | default: |
1754 | return false; |
1755 | case ISD::AND: |
1756 | NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; |
1757 | break; |
1758 | case ISD::OR: |
1759 | NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; |
1760 | break; |
1761 | case ISD::XOR: |
1762 | NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; |
1763 | break; |
1764 | } |
1765 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
1766 | if (!C) |
1767 | return false; |
1768 | uint64_t Imm = C->getZExtValue(); |
1769 | return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc); |
1770 | } |
1771 | |
1772 | |
1773 | |
1774 | void AArch64TargetLowering::computeKnownBitsForTargetNode( |
1775 | const SDValue Op, KnownBits &Known, |
1776 | const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { |
1777 | switch (Op.getOpcode()) { |
1778 | default: |
1779 | break; |
1780 | case AArch64ISD::CSEL: { |
1781 | KnownBits Known2; |
1782 | Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); |
1783 | Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); |
1784 | Known = KnownBits::commonBits(Known, Known2); |
1785 | break; |
1786 | } |
1787 | case AArch64ISD::LOADgot: |
1788 | case AArch64ISD::ADDlow: { |
1789 | if (!Subtarget->isTargetILP32()) |
1790 | break; |
1791 | |
1792 | Known.Zero = APInt::getHighBitsSet(64, 32); |
1793 | break; |
1794 | } |
1795 | case ISD::INTRINSIC_W_CHAIN: { |
1796 | ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); |
1797 | Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); |
1798 | switch (IntID) { |
1799 | default: return; |
1800 | case Intrinsic::aarch64_ldaxr: |
1801 | case Intrinsic::aarch64_ldxr: { |
1802 | unsigned BitWidth = Known.getBitWidth(); |
1803 | EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); |
1804 | unsigned MemBits = VT.getScalarSizeInBits(); |
1805 | Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); |
1806 | return; |
1807 | } |
1808 | } |
1809 | break; |
1810 | } |
1811 | case ISD::INTRINSIC_WO_CHAIN: |
1812 | case ISD::INTRINSIC_VOID: { |
1813 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
1814 | switch (IntNo) { |
1815 | default: |
1816 | break; |
1817 | case Intrinsic::aarch64_neon_umaxv: |
1818 | case Intrinsic::aarch64_neon_uminv: { |
1819 | |
1820 | |
1821 | |
1822 | |
1823 | MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); |
1824 | unsigned BitWidth = Known.getBitWidth(); |
1825 | if (VT == MVT::v8i8 || VT == MVT::v16i8) { |
1826 | assert(BitWidth >= 8 && "Unexpected width!"); |
1827 | APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); |
1828 | Known.Zero |= Mask; |
1829 | } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { |
1830 | assert(BitWidth >= 16 && "Unexpected width!"); |
1831 | APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); |
1832 | Known.Zero |= Mask; |
1833 | } |
1834 | break; |
1835 | } break; |
1836 | } |
1837 | } |
1838 | } |
1839 | } |
1840 | |
1841 | MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, |
1842 | EVT) const { |
1843 | return MVT::i64; |
1844 | } |
1845 | |
1846 | bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
1847 | EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
1848 | bool *Fast) const { |
1849 | if (Subtarget->requiresStrictAlign()) |
1850 | return false; |
1851 | |
1852 | if (Fast) { |
1853 | |
1854 | *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || |
1855 | |
1856 | |
1857 | |
1858 | |
1859 | |
1860 | |
1861 | Alignment <= 2 || |
1862 | |
1863 | |
1864 | |
1865 | VT == MVT::v2i64; |
1866 | } |
1867 | return true; |
1868 | } |
1869 | |
1870 | |
1871 | bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( |
1872 | LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, |
1873 | bool *Fast) const { |
1874 | if (Subtarget->requiresStrictAlign()) |
1875 | return false; |
1876 | |
1877 | if (Fast) { |
1878 | |
1879 | *Fast = !Subtarget->isMisaligned128StoreSlow() || |
1880 | Ty.getSizeInBytes() != 16 || |
1881 | |
1882 | |
1883 | |
1884 | |
1885 | |
1886 | |
1887 | Alignment <= 2 || |
1888 | |
1889 | |
1890 | |
1891 | Ty == LLT::fixed_vector(2, 64); |
1892 | } |
1893 | return true; |
1894 | } |
1895 | |
1896 | FastISel * |
1897 | AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
1898 | const TargetLibraryInfo *libInfo) const { |
1899 | return AArch64::createFastISel(funcInfo, libInfo); |
1900 | } |
1901 | |
1902 | const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { |
1903 | #define MAKE_CASE(V) \ |
1904 | case V: \ |
1905 | return #V; |
1906 | switch ((AArch64ISD::NodeType)Opcode) { |
1907 | case AArch64ISD::FIRST_NUMBER: |
1908 | break; |
1909 | MAKE_CASE(AArch64ISD::CALL) |
1910 | MAKE_CASE(AArch64ISD::ADRP) |
1911 | MAKE_CASE(AArch64ISD::ADR) |
1912 | MAKE_CASE(AArch64ISD::ADDlow) |
1913 | MAKE_CASE(AArch64ISD::LOADgot) |
1914 | MAKE_CASE(AArch64ISD::RET_FLAG) |
1915 | MAKE_CASE(AArch64ISD::BRCOND) |
1916 | MAKE_CASE(AArch64ISD::CSEL) |
1917 | MAKE_CASE(AArch64ISD::CSINV) |
1918 | MAKE_CASE(AArch64ISD::CSNEG) |
1919 | MAKE_CASE(AArch64ISD::CSINC) |
1920 | MAKE_CASE(AArch64ISD::THREAD_POINTER) |
1921 | MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) |
1922 | MAKE_CASE(AArch64ISD::ADD_PRED) |
1923 | MAKE_CASE(AArch64ISD::MUL_PRED) |
1924 | MAKE_CASE(AArch64ISD::MULHS_PRED) |
1925 | MAKE_CASE(AArch64ISD::MULHU_PRED) |
1926 | MAKE_CASE(AArch64ISD::SDIV_PRED) |
1927 | MAKE_CASE(AArch64ISD::SHL_PRED) |
1928 | MAKE_CASE(AArch64ISD::SMAX_PRED) |
1929 | MAKE_CASE(AArch64ISD::SMIN_PRED) |
1930 | MAKE_CASE(AArch64ISD::SRA_PRED) |
1931 | MAKE_CASE(AArch64ISD::SRL_PRED) |
1932 | MAKE_CASE(AArch64ISD::SUB_PRED) |
1933 | MAKE_CASE(AArch64ISD::UDIV_PRED) |
1934 | MAKE_CASE(AArch64ISD::UMAX_PRED) |
1935 | MAKE_CASE(AArch64ISD::UMIN_PRED) |
1936 | MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) |
1937 | MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) |
1938 | MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) |
1939 | MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU) |
1940 | MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU) |
1941 | MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU) |
1942 | MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU) |
1943 | MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) |
1944 | MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) |
1945 | MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) |
1946 | MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) |
1947 | MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) |
1948 | MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) |
1949 | MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) |
1950 | MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) |
1951 | MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU) |
1952 | MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) |
1953 | MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) |
1954 | MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) |
1955 | MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) |
1956 | MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) |
1957 | MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) |
1958 | MAKE_CASE(AArch64ISD::ADC) |
1959 | MAKE_CASE(AArch64ISD::SBC) |
1960 | MAKE_CASE(AArch64ISD::ADDS) |
1961 | MAKE_CASE(AArch64ISD::SUBS) |
1962 | MAKE_CASE(AArch64ISD::ADCS) |
1963 | MAKE_CASE(AArch64ISD::SBCS) |
1964 | MAKE_CASE(AArch64ISD::ANDS) |
1965 | MAKE_CASE(AArch64ISD::CCMP) |
1966 | MAKE_CASE(AArch64ISD::CCMN) |
1967 | MAKE_CASE(AArch64ISD::FCCMP) |
1968 | MAKE_CASE(AArch64ISD::FCMP) |
1969 | MAKE_CASE(AArch64ISD::STRICT_FCMP) |
1970 | MAKE_CASE(AArch64ISD::STRICT_FCMPE) |
1971 | MAKE_CASE(AArch64ISD::DUP) |
1972 | MAKE_CASE(AArch64ISD::DUPLANE8) |
1973 | MAKE_CASE(AArch64ISD::DUPLANE16) |
1974 | MAKE_CASE(AArch64ISD::DUPLANE32) |
1975 | MAKE_CASE(AArch64ISD::DUPLANE64) |
1976 | MAKE_CASE(AArch64ISD::MOVI) |
1977 | MAKE_CASE(AArch64ISD::MOVIshift) |
1978 | MAKE_CASE(AArch64ISD::MOVIedit) |
1979 | MAKE_CASE(AArch64ISD::MOVImsl) |
1980 | MAKE_CASE(AArch64ISD::FMOV) |
1981 | MAKE_CASE(AArch64ISD::MVNIshift) |
1982 | MAKE_CASE(AArch64ISD::MVNImsl) |
1983 | MAKE_CASE(AArch64ISD::BICi) |
1984 | MAKE_CASE(AArch64ISD::ORRi) |
1985 | MAKE_CASE(AArch64ISD::BSP) |
1986 | MAKE_CASE(AArch64ISD::EXTR) |
1987 | MAKE_CASE(AArch64ISD::ZIP1) |
1988 | MAKE_CASE(AArch64ISD::ZIP2) |
1989 | MAKE_CASE(AArch64ISD::UZP1) |
1990 | MAKE_CASE(AArch64ISD::UZP2) |
1991 | MAKE_CASE(AArch64ISD::TRN1) |
1992 | MAKE_CASE(AArch64ISD::TRN2) |
1993 | MAKE_CASE(AArch64ISD::REV16) |
1994 | MAKE_CASE(AArch64ISD::REV32) |
1995 | MAKE_CASE(AArch64ISD::REV64) |
1996 | MAKE_CASE(AArch64ISD::EXT) |
1997 | MAKE_CASE(AArch64ISD::SPLICE) |
1998 | MAKE_CASE(AArch64ISD::VSHL) |
1999 | MAKE_CASE(AArch64ISD::VLSHR) |
2000 | MAKE_CASE(AArch64ISD::VASHR) |
2001 | MAKE_CASE(AArch64ISD::VSLI) |
2002 | MAKE_CASE(AArch64ISD::VSRI) |
2003 | MAKE_CASE(AArch64ISD::CMEQ) |
2004 | MAKE_CASE(AArch64ISD::CMGE) |
2005 | MAKE_CASE(AArch64ISD::CMGT) |
2006 | MAKE_CASE(AArch64ISD::CMHI) |
2007 | MAKE_CASE(AArch64ISD::CMHS) |
2008 | MAKE_CASE(AArch64ISD::FCMEQ) |
2009 | MAKE_CASE(AArch64ISD::FCMGE) |
2010 | MAKE_CASE(AArch64ISD::FCMGT) |
2011 | MAKE_CASE(AArch64ISD::CMEQz) |
2012 | MAKE_CASE(AArch64ISD::CMGEz) |
2013 | MAKE_CASE(AArch64ISD::CMGTz) |
2014 | MAKE_CASE(AArch64ISD::CMLEz) |
2015 | MAKE_CASE(AArch64ISD::CMLTz) |
2016 | MAKE_CASE(AArch64ISD::FCMEQz) |
2017 | MAKE_CASE(AArch64ISD::FCMGEz) |
2018 | MAKE_CASE(AArch64ISD::FCMGTz) |
2019 | MAKE_CASE(AArch64ISD::FCMLEz) |
2020 | MAKE_CASE(AArch64ISD::FCMLTz) |
2021 | MAKE_CASE(AArch64ISD::SADDV) |
2022 | MAKE_CASE(AArch64ISD::UADDV) |
2023 | MAKE_CASE(AArch64ISD::SRHADD) |
2024 | MAKE_CASE(AArch64ISD::URHADD) |
2025 | MAKE_CASE(AArch64ISD::SHADD) |
2026 | MAKE_CASE(AArch64ISD::UHADD) |
2027 | MAKE_CASE(AArch64ISD::SDOT) |
2028 | MAKE_CASE(AArch64ISD::UDOT) |
2029 | MAKE_CASE(AArch64ISD::SMINV) |
2030 | MAKE_CASE(AArch64ISD::UMINV) |
2031 | MAKE_CASE(AArch64ISD::SMAXV) |
2032 | MAKE_CASE(AArch64ISD::UMAXV) |
2033 | MAKE_CASE(AArch64ISD::SADDV_PRED) |
2034 | MAKE_CASE(AArch64ISD::UADDV_PRED) |
2035 | MAKE_CASE(AArch64ISD::SMAXV_PRED) |
2036 | MAKE_CASE(AArch64ISD::UMAXV_PRED) |
2037 | MAKE_CASE(AArch64ISD::SMINV_PRED) |
2038 | MAKE_CASE(AArch64ISD::UMINV_PRED) |
2039 | MAKE_CASE(AArch64ISD::ORV_PRED) |
2040 | MAKE_CASE(AArch64ISD::EORV_PRED) |
2041 | MAKE_CASE(AArch64ISD::ANDV_PRED) |
2042 | MAKE_CASE(AArch64ISD::CLASTA_N) |
2043 | MAKE_CASE(AArch64ISD::CLASTB_N) |
2044 | MAKE_CASE(AArch64ISD::LASTA) |
2045 | MAKE_CASE(AArch64ISD::LASTB) |
2046 | MAKE_CASE(AArch64ISD::REINTERPRET_CAST) |
2047 | MAKE_CASE(AArch64ISD::LS64_BUILD) |
2048 | MAKE_CASE(AArch64ISD::LS64_EXTRACT) |
2049 | MAKE_CASE(AArch64ISD::TBL) |
2050 | MAKE_CASE(AArch64ISD::FADD_PRED) |
2051 | MAKE_CASE(AArch64ISD::FADDA_PRED) |
2052 | MAKE_CASE(AArch64ISD::FADDV_PRED) |
2053 | MAKE_CASE(AArch64ISD::FDIV_PRED) |
2054 | MAKE_CASE(AArch64ISD::FMA_PRED) |
2055 | MAKE_CASE(AArch64ISD::FMAX_PRED) |
2056 | MAKE_CASE(AArch64ISD::FMAXV_PRED) |
2057 | MAKE_CASE(AArch64ISD::FMAXNM_PRED) |
2058 | MAKE_CASE(AArch64ISD::FMAXNMV_PRED) |
2059 | MAKE_CASE(AArch64ISD::FMIN_PRED) |
2060 | MAKE_CASE(AArch64ISD::FMINV_PRED) |
2061 | MAKE_CASE(AArch64ISD::FMINNM_PRED) |
2062 | MAKE_CASE(AArch64ISD::FMINNMV_PRED) |
2063 | MAKE_CASE(AArch64ISD::FMUL_PRED) |
2064 | MAKE_CASE(AArch64ISD::FSUB_PRED) |
2065 | MAKE_CASE(AArch64ISD::BIC) |
2066 | MAKE_CASE(AArch64ISD::BIT) |
2067 | MAKE_CASE(AArch64ISD::CBZ) |
2068 | MAKE_CASE(AArch64ISD::CBNZ) |
2069 | MAKE_CASE(AArch64ISD::TBZ) |
2070 | MAKE_CASE(AArch64ISD::TBNZ) |
2071 | MAKE_CASE(AArch64ISD::TC_RETURN) |
2072 | MAKE_CASE(AArch64ISD::PREFETCH) |
2073 | MAKE_CASE(AArch64ISD::SITOF) |
2074 | MAKE_CASE(AArch64ISD::UITOF) |
2075 | MAKE_CASE(AArch64ISD::NVCAST) |
2076 | MAKE_CASE(AArch64ISD::MRS) |
2077 | MAKE_CASE(AArch64ISD::SQSHL_I) |
2078 | MAKE_CASE(AArch64ISD::UQSHL_I) |
2079 | MAKE_CASE(AArch64ISD::SRSHR_I) |
2080 | MAKE_CASE(AArch64ISD::URSHR_I) |
2081 | MAKE_CASE(AArch64ISD::SQSHLU_I) |
2082 | MAKE_CASE(AArch64ISD::WrapperLarge) |
2083 | MAKE_CASE(AArch64ISD::LD2post) |
2084 | MAKE_CASE(AArch64ISD::LD3post) |
2085 | MAKE_CASE(AArch64ISD::LD4post) |
2086 | MAKE_CASE(AArch64ISD::ST2post) |
2087 | MAKE_CASE(AArch64ISD::ST3post) |
2088 | MAKE_CASE(AArch64ISD::ST4post) |
2089 | MAKE_CASE(AArch64ISD::LD1x2post) |
2090 | MAKE_CASE(AArch64ISD::LD1x3post) |
2091 | MAKE_CASE(AArch64ISD::LD1x4post) |
2092 | MAKE_CASE(AArch64ISD::ST1x2post) |
2093 | MAKE_CASE(AArch64ISD::ST1x3post) |
2094 | MAKE_CASE(AArch64ISD::ST1x4post) |
2095 | MAKE_CASE(AArch64ISD::LD1DUPpost) |
2096 | MAKE_CASE(AArch64ISD::LD2DUPpost) |
2097 | MAKE_CASE(AArch64ISD::LD3DUPpost) |
2098 | MAKE_CASE(AArch64ISD::LD4DUPpost) |
2099 | MAKE_CASE(AArch64ISD::LD1LANEpost) |
2100 | MAKE_CASE(AArch64ISD::LD2LANEpost) |
2101 | MAKE_CASE(AArch64ISD::LD3LANEpost) |
2102 | MAKE_CASE(AArch64ISD::LD4LANEpost) |
2103 | MAKE_CASE(AArch64ISD::ST2LANEpost) |
2104 | MAKE_CASE(AArch64ISD::ST3LANEpost) |
2105 | MAKE_CASE(AArch64ISD::ST4LANEpost) |
2106 | MAKE_CASE(AArch64ISD::SMULL) |
2107 | MAKE_CASE(AArch64ISD::UMULL) |
2108 | MAKE_CASE(AArch64ISD::FRECPE) |
2109 | MAKE_CASE(AArch64ISD::FRECPS) |
2110 | MAKE_CASE(AArch64ISD::FRSQRTE) |
2111 | MAKE_CASE(AArch64ISD::FRSQRTS) |
2112 | MAKE_CASE(AArch64ISD::STG) |
2113 | MAKE_CASE(AArch64ISD::STZG) |
2114 | MAKE_CASE(AArch64ISD::ST2G) |
2115 | MAKE_CASE(AArch64ISD::STZ2G) |
2116 | MAKE_CASE(AArch64ISD::SUNPKHI) |
2117 | MAKE_CASE(AArch64ISD::SUNPKLO) |
2118 | MAKE_CASE(AArch64ISD::UUNPKHI) |
2119 | MAKE_CASE(AArch64ISD::UUNPKLO) |
2120 | MAKE_CASE(AArch64ISD::INSR) |
2121 | MAKE_CASE(AArch64ISD::PTEST) |
2122 | MAKE_CASE(AArch64ISD::PTRUE) |
2123 | MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) |
2124 | MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) |
2125 | MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) |
2126 | MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO) |
2127 | MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO) |
2128 | MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO) |
2129 | MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO) |
2130 | MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO) |
2131 | MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO) |
2132 | MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO) |
2133 | MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO) |
2134 | MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO) |
2135 | MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO) |
2136 | MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO) |
2137 | MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO) |
2138 | MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) |
2139 | MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) |
2140 | MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) |
2141 | MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) |
2142 | MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) |
2143 | MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) |
2144 | MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO) |
2145 | MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO) |
2146 | MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO) |
2147 | MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO) |
2148 | MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO) |
2149 | MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO) |
2150 | MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO) |
2151 | MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO) |
2152 | MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO) |
2153 | MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO) |
2154 | MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO) |
2155 | MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO) |
2156 | MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO) |
2157 | MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO) |
2158 | MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO) |
2159 | MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO) |
2160 | MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO) |
2161 | MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO) |
2162 | MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) |
2163 | MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) |
2164 | MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) |
2165 | MAKE_CASE(AArch64ISD::ST1_PRED) |
2166 | MAKE_CASE(AArch64ISD::SST1_PRED) |
2167 | MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) |
2168 | MAKE_CASE(AArch64ISD::SST1_SXTW_PRED) |
2169 | MAKE_CASE(AArch64ISD::SST1_UXTW_PRED) |
2170 | MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED) |
2171 | MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED) |
2172 | MAKE_CASE(AArch64ISD::SST1_IMM_PRED) |
2173 | MAKE_CASE(AArch64ISD::SSTNT1_PRED) |
2174 | MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) |
2175 | MAKE_CASE(AArch64ISD::LDP) |
2176 | MAKE_CASE(AArch64ISD::STP) |
2177 | MAKE_CASE(AArch64ISD::STNP) |
2178 | MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU) |
2179 | MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU) |
2180 | MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU) |
2181 | MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU) |
2182 | MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) |
2183 | MAKE_CASE(AArch64ISD::INDEX_VECTOR) |
2184 | MAKE_CASE(AArch64ISD::UADDLP) |
2185 | MAKE_CASE(AArch64ISD::CALL_RVMARKER) |
2186 | } |
2187 | #undef MAKE_CASE |
2188 | return nullptr; |
2189 | } |
2190 | |
2191 | MachineBasicBlock * |
2192 | AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI, |
2193 | MachineBasicBlock *MBB) const { |
2194 | |
2195 | |
2196 | |
2197 | |
2198 | |
2199 | |
2200 | |
2201 | |
2202 | |
2203 | |
2204 | |
2205 | |
2206 | MachineFunction *MF = MBB->getParent(); |
2207 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
2208 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
2209 | DebugLoc DL = MI.getDebugLoc(); |
2210 | MachineFunction::iterator It = ++MBB->getIterator(); |
2211 | |
2212 | Register DestReg = MI.getOperand(0).getReg(); |
2213 | Register IfTrueReg = MI.getOperand(1).getReg(); |
2214 | Register IfFalseReg = MI.getOperand(2).getReg(); |
2215 | unsigned CondCode = MI.getOperand(3).getImm(); |
2216 | bool NZCVKilled = MI.getOperand(4).isKill(); |
2217 | |
2218 | MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); |
2219 | MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); |
2220 | MF->insert(It, TrueBB); |
2221 | MF->insert(It, EndBB); |
2222 | |
2223 | |
2224 | EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), |
2225 | MBB->end()); |
2226 | EndBB->transferSuccessorsAndUpdatePHIs(MBB); |
2227 | |
2228 | BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); |
2229 | BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); |
2230 | MBB->addSuccessor(TrueBB); |
2231 | MBB->addSuccessor(EndBB); |
2232 | |
2233 | |
2234 | TrueBB->addSuccessor(EndBB); |
2235 | |
2236 | if (!NZCVKilled) { |
2237 | TrueBB->addLiveIn(AArch64::NZCV); |
2238 | EndBB->addLiveIn(AArch64::NZCV); |
2239 | } |
2240 | |
2241 | BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) |
2242 | .addReg(IfTrueReg) |
2243 | .addMBB(TrueBB) |
2244 | .addReg(IfFalseReg) |
2245 | .addMBB(MBB); |
2246 | |
2247 | MI.eraseFromParent(); |
2248 | return EndBB; |
2249 | } |
2250 | |
2251 | MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( |
2252 | MachineInstr &MI, MachineBasicBlock *BB) const { |
2253 | assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
2254 | BB->getParent()->getFunction().getPersonalityFn())) && |
2255 | "SEH does not use catchret!"); |
2256 | return BB; |
2257 | } |
2258 | |
2259 | MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( |
2260 | MachineInstr &MI, MachineBasicBlock *BB) const { |
2261 | switch (MI.getOpcode()) { |
2262 | default: |
2263 | #ifndef NDEBUG |
2264 | MI.dump(); |
2265 | #endif |
2266 | llvm_unreachable("Unexpected instruction for custom inserter!"); |
2267 | |
2268 | case AArch64::F128CSEL: |
2269 | return EmitF128CSEL(MI, BB); |
2270 | |
2271 | case TargetOpcode::STACKMAP: |
2272 | case TargetOpcode::PATCHPOINT: |
2273 | case TargetOpcode::STATEPOINT: |
2274 | return emitPatchPoint(MI, BB); |
2275 | |
2276 | case AArch64::CATCHRET: |
2277 | return EmitLoweredCatchRet(MI, BB); |
2278 | } |
2279 | } |
2280 | |
2281 | |
2282 | |
2283 | |
2284 | |
2285 | |
2286 | |
2287 | |
2288 | |
2289 | |
2290 | static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT); |
2291 | static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); |
2292 | static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); |
2293 | static SDValue convertFixedMaskToScalableVector(SDValue Mask, |
2294 | SelectionDAG &DAG); |
2295 | |
2296 | |
2297 | static bool isZerosVector(const SDNode *N) { |
2298 | |
2299 | while (N->getOpcode() == ISD::BITCAST) |
2300 | N = N->getOperand(0).getNode(); |
2301 | |
2302 | if (ISD::isConstantSplatVectorAllZeros(N)) |
2303 | return true; |
2304 | |
2305 | if (N->getOpcode() != AArch64ISD::DUP) |
2306 | return false; |
2307 | |
2308 | auto Opnd0 = N->getOperand(0); |
2309 | auto *CINT = dyn_cast<ConstantSDNode>(Opnd0); |
2310 | auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0); |
2311 | return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero()); |
2312 | } |
2313 | |
2314 | |
2315 | |
2316 | static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { |
2317 | switch (CC) { |
2318 | default: |
2319 | llvm_unreachable("Unknown condition code!"); |
2320 | case ISD::SETNE: |
2321 | return AArch64CC::NE; |
2322 | case ISD::SETEQ: |
2323 | return AArch64CC::EQ; |
2324 | case ISD::SETGT: |
2325 | return AArch64CC::GT; |
2326 | case ISD::SETGE: |
2327 | return AArch64CC::GE; |
2328 | case ISD::SETLT: |
2329 | return AArch64CC::LT; |
2330 | case ISD::SETLE: |
2331 | return AArch64CC::LE; |
2332 | case ISD::SETUGT: |
2333 | return AArch64CC::HI; |
2334 | case ISD::SETUGE: |
2335 | return AArch64CC::HS; |
2336 | case ISD::SETULT: |
2337 | return AArch64CC::LO; |
2338 | case ISD::SETULE: |
2339 | return AArch64CC::LS; |
2340 | } |
2341 | } |
2342 | |
2343 | |
2344 | static void changeFPCCToAArch64CC(ISD::CondCode CC, |
2345 | AArch64CC::CondCode &CondCode, |
2346 | AArch64CC::CondCode &CondCode2) { |
2347 | CondCode2 = AArch64CC::AL; |
2348 | switch (CC) { |
2349 | default: |
2350 | llvm_unreachable("Unknown FP condition!"); |
2351 | case ISD::SETEQ: |
2352 | case ISD::SETOEQ: |
2353 | CondCode = AArch64CC::EQ; |
2354 | break; |
2355 | case ISD::SETGT: |
2356 | case ISD::SETOGT: |
2357 | CondCode = AArch64CC::GT; |
2358 | break; |
2359 | case ISD::SETGE: |
2360 | case ISD::SETOGE: |
2361 | CondCode = AArch64CC::GE; |
2362 | break; |
2363 | case ISD::SETOLT: |
2364 | CondCode = AArch64CC::MI; |
2365 | break; |
2366 | case ISD::SETOLE: |
2367 | CondCode = AArch64CC::LS; |
2368 | break; |
2369 | case ISD::SETONE: |
2370 | CondCode = AArch64CC::MI; |
2371 | CondCode2 = AArch64CC::GT; |
2372 | break; |
2373 | case ISD::SETO: |
2374 | CondCode = AArch64CC::VC; |
2375 | break; |
2376 | case ISD::SETUO: |
2377 | CondCode = AArch64CC::VS; |
2378 | break; |
2379 | case ISD::SETUEQ: |
2380 | CondCode = AArch64CC::EQ; |
2381 | CondCode2 = AArch64CC::VS; |
2382 | break; |
2383 | case ISD::SETUGT: |
2384 | CondCode = AArch64CC::HI; |
2385 | break; |
2386 | case ISD::SETUGE: |
2387 | CondCode = AArch64CC::PL; |
2388 | break; |
2389 | case ISD::SETLT: |
2390 | case ISD::SETULT: |
2391 | CondCode = AArch64CC::LT; |
2392 | break; |
2393 | case ISD::SETLE: |
2394 | case ISD::SETULE: |
2395 | CondCode = AArch64CC::LE; |
2396 | break; |
2397 | case ISD::SETNE: |
2398 | case ISD::SETUNE: |
2399 | CondCode = AArch64CC::NE; |
2400 | break; |
2401 | } |
2402 | } |
2403 | |
2404 | |
2405 | |
2406 | |
2407 | static void changeFPCCToANDAArch64CC(ISD::CondCode CC, |
2408 | AArch64CC::CondCode &CondCode, |
2409 | AArch64CC::CondCode &CondCode2) { |
2410 | CondCode2 = AArch64CC::AL; |
2411 | switch (CC) { |
2412 | default: |
2413 | changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
2414 | assert(CondCode2 == AArch64CC::AL); |
2415 | break; |
2416 | case ISD::SETONE: |
2417 | |
2418 | |
2419 | |
2420 | CondCode = AArch64CC::VC; |
2421 | CondCode2 = AArch64CC::NE; |
2422 | break; |
2423 | case ISD::SETUEQ: |
2424 | |
2425 | |
2426 | |
2427 | CondCode = AArch64CC::PL; |
2428 | CondCode2 = AArch64CC::LE; |
2429 | break; |
2430 | } |
2431 | } |
2432 | |
2433 | |
2434 | |
2435 | |
2436 | |
2437 | static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, |
2438 | AArch64CC::CondCode &CondCode, |
2439 | AArch64CC::CondCode &CondCode2, |
2440 | bool &Invert) { |
2441 | Invert = false; |
2442 | switch (CC) { |
2443 | default: |
2444 | |
2445 | changeFPCCToAArch64CC(CC, CondCode, CondCode2); |
2446 | break; |
2447 | case ISD::SETUO: |
2448 | Invert = true; |
2449 | LLVM_FALLTHROUGH; |
2450 | case ISD::SETO: |
2451 | CondCode = AArch64CC::MI; |
2452 | CondCode2 = AArch64CC::GE; |
2453 | break; |
2454 | case ISD::SETUEQ: |
2455 | case ISD::SETULT: |
2456 | case ISD::SETULE: |
2457 | case ISD::SETUGT: |
2458 | case ISD::SETUGE: |
2459 | |
2460 | |
2461 | Invert = true; |
2462 | changeFPCCToAArch64CC(getSetCCInverse(CC, MVT::f32), |
2463 | CondCode, CondCode2); |
2464 | break; |
2465 | } |
2466 | } |
2467 | |
2468 | static bool isLegalArithImmed(uint64_t C) { |
2469 | |
2470 | bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); |
2471 | LLVM_DEBUG(dbgs() << "Is imm " << C |
2472 | << " legal: " << (IsLegal ? "yes\n" : "no\n")); |
2473 | return IsLegal; |
2474 | } |
2475 | |
2476 | |
2477 | |
2478 | |
2479 | |
2480 | |
2481 | |
2482 | |
2483 | |
2484 | |
2485 | |
2486 | static bool isCMN(SDValue Op, ISD::CondCode CC) { |
2487 | return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) && |
2488 | (CC == ISD::SETEQ || CC == ISD::SETNE); |
2489 | } |
2490 | |
2491 | static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, |
2492 | SelectionDAG &DAG, SDValue Chain, |
2493 | bool IsSignaling) { |
2494 | EVT VT = LHS.getValueType(); |
2495 | assert(VT != MVT::f128); |
2496 | assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"); |
2497 | unsigned Opcode = |
2498 | IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP; |
2499 | return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS}); |
2500 | } |
2501 | |
2502 | static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
2503 | const SDLoc &dl, SelectionDAG &DAG) { |
2504 | EVT VT = LHS.getValueType(); |
2505 | const bool FullFP16 = |
2506 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
2507 | |
2508 | if (VT.isFloatingPoint()) { |
2509 | assert(VT != MVT::f128); |
2510 | if (VT == MVT::f16 && !FullFP16) { |
2511 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); |
2512 | RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); |
2513 | VT = MVT::f32; |
2514 | } |
2515 | return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); |
2516 | } |
2517 | |
2518 | |
2519 | |
2520 | |
2521 | |
2522 | unsigned Opcode = AArch64ISD::SUBS; |
2523 | |
2524 | if (isCMN(RHS, CC)) { |
2525 | |
2526 | Opcode = AArch64ISD::ADDS; |
2527 | RHS = RHS.getOperand(1); |
2528 | } else if (isCMN(LHS, CC)) { |
2529 | |
2530 | |
2531 | Opcode = AArch64ISD::ADDS; |
2532 | LHS = LHS.getOperand(1); |
2533 | } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) { |
2534 | if (LHS.getOpcode() == ISD::AND) { |
2535 | |
2536 | |
2537 | |
2538 | const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl, |
2539 | DAG.getVTList(VT, MVT_CC), |
2540 | LHS.getOperand(0), |
2541 | LHS.getOperand(1)); |
2542 | |
2543 | DAG.ReplaceAllUsesWith(LHS, ANDSNode); |
2544 | return ANDSNode.getValue(1); |
2545 | } else if (LHS.getOpcode() == AArch64ISD::ANDS) { |
2546 | |
2547 | return LHS.getValue(1); |
2548 | } |
2549 | } |
2550 | |
2551 | return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) |
2552 | .getValue(1); |
2553 | } |
2554 | |
2555 | |
2556 | |
2557 | |
2558 | |
2559 | |
2560 | |
2561 | |
2562 | |
2563 | |
2564 | |
2565 | |
2566 | |
2567 | |
2568 | |
2569 | |
2570 | |
2571 | |
2572 | |
2573 | |
2574 | |
2575 | |
2576 | |
2577 | |
2578 | |
2579 | |
2580 | |
2581 | |
2582 | |
2583 | |
2584 | |
2585 | |
2586 | |
2587 | |
2588 | |
2589 | |
2590 | |
2591 | |
2592 | |
2593 | |
2594 | |
2595 | |
2596 | |
2597 | |
2598 | |
2599 | |
2600 | |
2601 | |
2602 | |
2603 | |
2604 | |
2605 | |
2606 | |
2607 | |
2608 | static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, |
2609 | ISD::CondCode CC, SDValue CCOp, |
2610 | AArch64CC::CondCode Predicate, |
2611 | AArch64CC::CondCode OutCC, |
2612 | const SDLoc &DL, SelectionDAG &DAG) { |
2613 | unsigned Opcode = 0; |
2614 | const bool FullFP16 = |
2615 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
2616 | |
2617 | if (LHS.getValueType().isFloatingPoint()) { |
2618 | assert(LHS.getValueType() != MVT::f128); |
2619 | if (LHS.getValueType() == MVT::f16 && !FullFP16) { |
2620 | LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); |
2621 | RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); |
2622 | } |
2623 | Opcode = AArch64ISD::FCCMP; |
2624 | } else if (RHS.getOpcode() == ISD::SUB) { |
2625 | SDValue SubOp0 = RHS.getOperand(0); |
2626 | if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
2627 | |
2628 | Opcode = AArch64ISD::CCMN; |
2629 | RHS = RHS.getOperand(1); |
2630 | } |
2631 | } |
2632 | if (Opcode == 0) |
2633 | Opcode = AArch64ISD::CCMP; |
2634 | |
2635 | SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); |
2636 | AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); |
2637 | unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); |
2638 | SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); |
2639 | return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); |
2640 | } |
2641 | |
2642 | |
2643 | |
2644 | |
2645 | |
2646 | |
2647 | |
2648 | |
2649 | |
2650 | |
2651 | |
2652 | |
2653 | |
2654 | |
2655 | |
2656 | static bool canEmitConjunction(const SDValue Val, bool &CanNegate, |
2657 | bool &MustBeFirst, bool WillNegate, |
2658 | unsigned Depth = 0) { |
2659 | if (!Val.hasOneUse()) |
| 19 | | Assuming the condition is false | |
|
| |
| 33 | | Assuming the condition is true | |
|
| |
2660 | return false; |
| 35 | | Returning without writing to 'CanNegate' | |
|
2661 | unsigned Opcode = Val->getOpcode(); |
2662 | if (Opcode == ISD::SETCC) { |
| |
2663 | if (Val->getOperand(0).getValueType() == MVT::f128) |
2664 | return false; |
2665 | CanNegate = true; |
2666 | MustBeFirst = false; |
2667 | return true; |
2668 | } |
2669 | |
2670 | if (Depth > 6) |
2671 | return false; |
2672 | if (Opcode == ISD::AND || Opcode == ISD::OR) { |
| |
2673 | bool IsOR = Opcode == ISD::OR; |
2674 | SDValue O0 = Val->getOperand(0); |
2675 | SDValue O1 = Val->getOperand(1); |
2676 | bool CanNegateL; |
2677 | bool MustBeFirstL; |
2678 | if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) |
| 23 | | Assuming the condition is false | |
|
| |
2679 | return false; |
2680 | bool CanNegateR; |
2681 | bool MustBeFirstR; |
2682 | if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) |
| 25 | | Assuming the condition is false | |
|
2683 | return false; |
2684 | |
2685 | if (MustBeFirstL && MustBeFirstR) |
| 26 | | Assuming 'MustBeFirstL' is false | |
|
2686 | return false; |
2687 | |
2688 | if (IsOR) { |
2689 | |
2690 | |
2691 | if (!CanNegateL && !CanNegateR) |
| 27 | | Assuming 'CanNegateL' is true, which participates in a condition later | |
|
2692 | return false; |
2693 | |
2694 | |
2695 | CanNegate = WillNegate && CanNegateL && CanNegateR; |
2696 | |
2697 | |
2698 | MustBeFirst = !CanNegate; |
| 28 | | Assuming 'CanNegate' is false | |
|
| 29 | | The value 1 is assigned to 'MustBeFirstL', which participates in a condition later | |
|
2699 | } else { |
2700 | assert(Opcode == ISD::AND && "Must be OR or AND"); |
2701 | |
2702 | CanNegate = false; |
2703 | MustBeFirst = MustBeFirstL || MustBeFirstR; |
2704 | } |
2705 | return true; |
2706 | } |
2707 | return false; |
2708 | } |
2709 | |
2710 | |
2711 | |
2712 | |
2713 | |
2714 | |
2715 | |
2716 | |
2717 | |
2718 | static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, |
2719 | AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, |
2720 | AArch64CC::CondCode Predicate) { |
2721 | |
2722 | unsigned Opcode = Val->getOpcode(); |
2723 | if (Opcode == ISD::SETCC) { |
| |
2724 | SDValue LHS = Val->getOperand(0); |
2725 | SDValue RHS = Val->getOperand(1); |
2726 | ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); |
2727 | bool isInteger = LHS.getValueType().isInteger(); |
2728 | if (Negate) |
2729 | CC = getSetCCInverse(CC, LHS.getValueType()); |
2730 | SDLoc DL(Val); |
2731 | |
2732 | if (isInteger) { |
2733 | OutCC = changeIntCCToAArch64CC(CC); |
2734 | } else { |
2735 | assert(LHS.getValueType().isFloatingPoint()); |
2736 | AArch64CC::CondCode ExtraCC; |
2737 | changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); |
2738 | |
2739 | |
2740 | if (ExtraCC != AArch64CC::AL) { |
2741 | SDValue ExtraCmp; |
2742 | if (!CCOp.getNode()) |
2743 | ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); |
2744 | else |
2745 | ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, |
2746 | ExtraCC, DL, DAG); |
2747 | CCOp = ExtraCmp; |
2748 | Predicate = ExtraCC; |
2749 | } |
2750 | } |
2751 | |
2752 | |
2753 | if (!CCOp) |
2754 | return emitComparison(LHS, RHS, CC, DL, DAG); |
2755 | |
2756 | return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, |
2757 | DAG); |
2758 | } |
2759 | assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); |
2760 | |
2761 | bool IsOR = Opcode == ISD::OR; |
2762 | |
2763 | SDValue LHS = Val->getOperand(0); |
2764 | bool CanNegateL; |
2765 | bool MustBeFirstL; |
2766 | bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); |
| 18 | | Calling 'canEmitConjunction' | |
|
| 30 | | Returning from 'canEmitConjunction' | |
|
2767 | assert(ValidL && "Valid conjunction/disjunction tree"); |
2768 | (void)ValidL; |
2769 | |
2770 | SDValue RHS = Val->getOperand(1); |
2771 | bool CanNegateR; |
| 31 | | 'CanNegateR' declared without an initial value | |
|
2772 | bool MustBeFirstR; |
2773 | bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); |
| 32 | | Calling 'canEmitConjunction' | |
|
| 36 | | Returning from 'canEmitConjunction' | |
|
2774 | assert(ValidR && "Valid conjunction/disjunction tree"); |
2775 | (void)ValidR; |
2776 | |
2777 | |
2778 | if (MustBeFirstL) { |
| |
2779 | assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); |
2780 | std::swap(LHS, RHS); |
2781 | std::swap(CanNegateL, CanNegateR); |
| 38 | | Passing value via 2nd parameter '__b' | |
|
| |
| 45 | | Returning from 'swap<bool>' | |
|
2782 | std::swap(MustBeFirstL, MustBeFirstR); |
2783 | } |
2784 | |
2785 | bool NegateR; |
2786 | bool NegateAfterR; |
2787 | bool NegateL; |
2788 | bool NegateAfterAll; |
2789 | if (Opcode == ISD::OR) { |
| |
2790 | |
2791 | if (!CanNegateL) { |
| 47 | | Branch condition evaluates to a garbage value |
|
2792 | assert(CanNegateR && "at least one side must be negatable"); |
2793 | assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); |
2794 | assert(!Negate); |
2795 | std::swap(LHS, RHS); |
2796 | NegateR = false; |
2797 | NegateAfterR = true; |
2798 | } else { |
2799 | |
2800 | NegateR = CanNegateR; |
2801 | NegateAfterR = !CanNegateR; |
2802 | } |
2803 | NegateL = true; |
2804 | NegateAfterAll = !Negate; |
2805 | } else { |
2806 | assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree"); |
2807 | assert(!Negate && "Valid conjunction/disjunction tree"); |
2808 | |
2809 | NegateL = false; |
2810 | NegateR = false; |
2811 | NegateAfterR = false; |
2812 | NegateAfterAll = false; |
2813 | } |
2814 | |
2815 | |
2816 | AArch64CC::CondCode RHSCC; |
2817 | SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate); |
2818 | if (NegateAfterR) |
2819 | RHSCC = AArch64CC::getInvertedCondCode(RHSCC); |
2820 | SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC); |
2821 | if (NegateAfterAll) |
2822 | OutCC = AArch64CC::getInvertedCondCode(OutCC); |
2823 | return CmpL; |
2824 | } |
2825 | |
2826 | |
2827 | |
2828 | |
2829 | |
2830 | static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, |
2831 | AArch64CC::CondCode &OutCC) { |
2832 | bool DummyCanNegate; |
2833 | bool DummyMustBeFirst; |
2834 | if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) |
| |
2835 | return SDValue(); |
2836 | |
2837 | return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); |
| 16 | | Calling 'emitConjunctionRec' | |
|
2838 | } |
2839 | |
2840 | |
2841 | |
2842 | |
2843 | |
2844 | static unsigned getCmpOperandFoldingProfit(SDValue Op) { |
2845 | auto isSupportedExtend = [&](SDValue V) { |
2846 | if (V.getOpcode() == ISD::SIGN_EXTEND_INREG) |
2847 | return true; |
2848 | |
2849 | if (V.getOpcode() == ISD::AND) |
2850 | if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { |
2851 | uint64_t Mask = MaskCst->getZExtValue(); |
2852 | return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); |
2853 | } |
2854 | |
2855 | return false; |
2856 | }; |
2857 | |
2858 | if (!Op.hasOneUse()) |
2859 | return 0; |
2860 | |
2861 | if (isSupportedExtend(Op)) |
2862 | return 1; |
2863 | |
2864 | unsigned Opc = Op.getOpcode(); |
2865 | if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) |
2866 | if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
2867 | uint64_t Shift = ShiftCst->getZExtValue(); |
2868 | if (isSupportedExtend(Op.getOperand(0))) |
2869 | return (Shift <= 4) ? 2 : 1; |
2870 | EVT VT = Op.getValueType(); |
2871 | if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63)) |
2872 | return 1; |
2873 | } |
2874 | |
2875 | return 0; |
2876 | } |
2877 | |
2878 | static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, |
2879 | SDValue &AArch64cc, SelectionDAG &DAG, |
2880 | const SDLoc &dl) { |
2881 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { |
| 5 | | Assuming the object is not a 'ConstantSDNode' | |
|
2882 | EVT VT = RHS.getValueType(); |
2883 | uint64_t C = RHSC->getZExtValue(); |
2884 | if (!isLegalArithImmed(C)) { |
2885 | |
2886 | switch (CC) { |
2887 | default: |
2888 | break; |
2889 | case ISD::SETLT: |
2890 | case ISD::SETGE: |
2891 | if ((VT == MVT::i32 && C != 0x80000000 && |
2892 | isLegalArithImmed((uint32_t)(C - 1))) || |
2893 | (VT == MVT::i64 && C != 0x80000000ULL && |
2894 | isLegalArithImmed(C - 1ULL))) { |
2895 | CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; |
2896 | C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
2897 | RHS = DAG.getConstant(C, dl, VT); |
2898 | } |
2899 | break; |
2900 | case ISD::SETULT: |
2901 | case ISD::SETUGE: |
2902 | if ((VT == MVT::i32 && C != 0 && |
2903 | isLegalArithImmed((uint32_t)(C - 1))) || |
2904 | (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { |
2905 | CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; |
2906 | C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; |
2907 | RHS = DAG.getConstant(C, dl, VT); |
2908 | } |
2909 | break; |
2910 | case ISD::SETLE: |
2911 | case ISD::SETGT: |
2912 | if ((VT == MVT::i32 && C != INT32_MAX && |
2913 | isLegalArithImmed((uint32_t)(C + 1))) || |
2914 | (VT == MVT::i64 && C != INT64_MAX && |
2915 | isLegalArithImmed(C + 1ULL))) { |
2916 | CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; |
2917 | C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
2918 | RHS = DAG.getConstant(C, dl, VT); |
2919 | } |
2920 | break; |
2921 | case ISD::SETULE: |
2922 | case ISD::SETUGT: |
2923 | if ((VT == MVT::i32 && C != UINT32_MAX && |
2924 | isLegalArithImmed((uint32_t)(C + 1))) || |
2925 | (VT == MVT::i64 && C != UINT64_MAX && |
2926 | isLegalArithImmed(C + 1ULL))) { |
2927 | CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; |
2928 | C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; |
2929 | RHS = DAG.getConstant(C, dl, VT); |
2930 | } |
2931 | break; |
2932 | } |
2933 | } |
2934 | } |
2935 | |
2936 | |
2937 | |
2938 | |
2939 | |
2940 | |
2941 | |
2942 | |
2943 | |
2944 | |
2945 | |
2946 | if (!isa<ConstantSDNode>(RHS) || |
| 6 | | Assuming 'RHS' is not a 'ConstantSDNode' | |
|
2947 | !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) { |
2948 | SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS; |
| |
2949 | |
2950 | if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) { |
| |
2951 | std::swap(LHS, RHS); |
2952 | CC = ISD::getSetCCSwappedOperands(CC); |
2953 | } |
2954 | } |
2955 | |
2956 | SDValue Cmp; |
2957 | AArch64CC::CondCode AArch64CC; |
2958 | if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) { |
| 9 | | Assuming 'CC' is equal to SETEQ | |
|
| 10 | | Assuming 'RHS' is a 'ConstantSDNode' | |
|
| |
2959 | const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS); |
2960 | |
2961 | |
2962 | |
2963 | |
2964 | |
2965 | |
2966 | |
2967 | |
2968 | |
2969 | |
2970 | |
2971 | |
2972 | |
2973 | |
2974 | |
2975 | |
2976 | |
2977 | if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && |
| 12 | | Assuming the condition is false | |
|
2978 | cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && |
2979 | cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && |
2980 | LHS.getNode()->hasNUsesOfValue(1, 0)) { |
2981 | int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); |
2982 | if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { |
2983 | SDValue SExt = |
2984 | DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, |
2985 | DAG.getValueType(MVT::i16)); |
2986 | Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, |
2987 | RHS.getValueType()), |
2988 | CC, dl, DAG); |
2989 | AArch64CC = changeIntCCToAArch64CC(CC); |
2990 | } |
2991 | } |
2992 | |
2993 | if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { |
| |
2994 | if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) { |
| 14 | | Calling 'emitConjunction' | |
|
2995 | if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) |
2996 | AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); |
2997 | } |
2998 | } |
2999 | } |
3000 | |
3001 | if (!Cmp) { |
3002 | Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
3003 | AArch64CC = changeIntCCToAArch64CC(CC); |
3004 | } |
3005 | AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); |
3006 | return Cmp; |
3007 | } |
3008 | |
3009 | static std::pair<SDValue, SDValue> |
3010 | getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { |
3011 | assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && |
3012 | "Unsupported value type"); |
3013 | SDValue Value, Overflow; |
3014 | SDLoc DL(Op); |
3015 | SDValue LHS = Op.getOperand(0); |
3016 | SDValue RHS = Op.getOperand(1); |
3017 | unsigned Opc = 0; |
3018 | switch (Op.getOpcode()) { |
3019 | default: |
3020 | llvm_unreachable("Unknown overflow instruction!"); |
3021 | case ISD::SADDO: |
3022 | Opc = AArch64ISD::ADDS; |
3023 | CC = AArch64CC::VS; |
3024 | break; |
3025 | case ISD::UADDO: |
3026 | Opc = AArch64ISD::ADDS; |
3027 | CC = AArch64CC::HS; |
3028 | break; |
3029 | case ISD::SSUBO: |
3030 | Opc = AArch64ISD::SUBS; |
3031 | CC = AArch64CC::VS; |
3032 | break; |
3033 | case ISD::USUBO: |
3034 | Opc = AArch64ISD::SUBS; |
3035 | CC = AArch64CC::LO; |
3036 | break; |
3037 | |
3038 | case ISD::SMULO: |
3039 | case ISD::UMULO: { |
3040 | CC = AArch64CC::NE; |
3041 | bool IsSigned = Op.getOpcode() == ISD::SMULO; |
3042 | if (Op.getValueType() == MVT::i32) { |
3043 | |
3044 | unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
3045 | LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); |
3046 | RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); |
3047 | SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
3048 | Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); |
3049 | |
3050 | |
3051 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC); |
3052 | if (IsSigned) { |
3053 | |
3054 | SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value); |
3055 | Overflow = |
3056 | DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1); |
3057 | } else { |
3058 | |
3059 | SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64); |
3060 | Overflow = |
3061 | DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1); |
3062 | } |
3063 | break; |
3064 | } |
3065 | assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); |
3066 | |
3067 | Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); |
3068 | if (IsSigned) { |
3069 | SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); |
3070 | SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, |
3071 | DAG.getConstant(63, DL, MVT::i64)); |
3072 | |
3073 | |
3074 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
3075 | Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) |
3076 | .getValue(1); |
3077 | } else { |
3078 | SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); |
3079 | SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); |
3080 | Overflow = |
3081 | DAG.getNode(AArch64ISD::SUBS, DL, VTs, |
3082 | DAG.getConstant(0, DL, MVT::i64), |
3083 | UpperBits).getValue(1); |
3084 | } |
3085 | break; |
3086 | } |
3087 | } |
3088 | |
3089 | if (Opc) { |
3090 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
3091 | |
3092 | |
3093 | Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); |
3094 | Overflow = Value.getValue(1); |
3095 | } |
3096 | return std::make_pair(Value, Overflow); |
3097 | } |
3098 | |
3099 | SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { |
3100 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
3101 | return LowerToScalableOp(Op, DAG); |
3102 | |
3103 | SDValue Sel = Op.getOperand(0); |
3104 | SDValue Other = Op.getOperand(1); |
3105 | SDLoc dl(Sel); |
3106 | |
3107 | |
3108 | |
3109 | |
3110 | |
3111 | |
3112 | |
3113 | |
3114 | if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) { |
3115 | |
3116 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) |
3117 | return SDValue(); |
3118 | |
3119 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
3120 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
3121 | AArch64CC::CondCode CC; |
3122 | SDValue Value, Overflow; |
3123 | std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); |
3124 | SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
3125 | return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal, |
3126 | CCVal, Overflow); |
3127 | } |
3128 | |
3129 | if (Sel.getOpcode() != ISD::SELECT_CC) |
3130 | std::swap(Sel, Other); |
3131 | if (Sel.getOpcode() != ISD::SELECT_CC) |
3132 | return Op; |
3133 | |
3134 | |
3135 | |
3136 | |
3137 | |
3138 | |
3139 | |
3140 | |
3141 | ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get(); |
3142 | SDValue LHS = Sel.getOperand(0); |
3143 | SDValue RHS = Sel.getOperand(1); |
3144 | SDValue TVal = Sel.getOperand(2); |
3145 | SDValue FVal = Sel.getOperand(3); |
3146 | |
3147 | |
3148 | if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) |
3149 | return Op; |
3150 | |
3151 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); |
3152 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); |
3153 | |
3154 | |
3155 | if (!CFVal || !CTVal) |
3156 | return Op; |
3157 | |
3158 | |
3159 | |
3160 | if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { |
3161 | std::swap(TVal, FVal); |
3162 | std::swap(CTVal, CFVal); |
3163 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
3164 | } |
3165 | |
3166 | |
3167 | if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { |
3168 | SDValue CCVal; |
3169 | SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); |
3170 | |
3171 | FVal = Other; |
3172 | TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, |
3173 | DAG.getConstant(-1ULL, dl, Other.getValueType())); |
3174 | |
3175 | return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, |
3176 | CCVal, Cmp); |
3177 | } |
3178 | |
3179 | return Op; |
3180 | } |
3181 | |
3182 | static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { |
3183 | EVT VT = Op.getValueType(); |
3184 | |
3185 | |
3186 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
3187 | return SDValue(); |
3188 | |
3189 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
3190 | |
3191 | unsigned Opc; |
3192 | bool ExtraOp = false; |
3193 | switch (Op.getOpcode()) { |
3194 | default: |
3195 | llvm_unreachable("Invalid code"); |
3196 | case ISD::ADDC: |
3197 | Opc = AArch64ISD::ADDS; |
3198 | break; |
3199 | case ISD::SUBC: |
3200 | Opc = AArch64ISD::SUBS; |
3201 | break; |
3202 | case ISD::ADDE: |
3203 | Opc = AArch64ISD::ADCS; |
3204 | ExtraOp = true; |
3205 | break; |
3206 | case ISD::SUBE: |
3207 | Opc = AArch64ISD::SBCS; |
3208 | ExtraOp = true; |
3209 | break; |
3210 | } |
3211 | |
3212 | if (!ExtraOp) |
3213 | return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); |
3214 | return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), |
3215 | Op.getOperand(2)); |
3216 | } |
3217 | |
3218 | static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
3219 | |
3220 | if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) |
3221 | return SDValue(); |
3222 | |
3223 | SDLoc dl(Op); |
3224 | AArch64CC::CondCode CC; |
3225 | |
3226 | SDValue Value, Overflow; |
3227 | std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); |
3228 | |
3229 | |
3230 | SDValue TVal = DAG.getConstant(1, dl, MVT::i32); |
3231 | SDValue FVal = DAG.getConstant(0, dl, MVT::i32); |
3232 | |
3233 | |
3234 | |
3235 | |
3236 | SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); |
3237 | Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, |
3238 | CCVal, Overflow); |
3239 | |
3240 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
3241 | return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); |
3242 | } |
3243 | |
3244 | |
3245 | |
3246 | |
3247 | |
3248 | |
3249 | static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { |
3250 | SDLoc DL(Op); |
3251 | unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); |
3252 | unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); |
3253 | unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); |
3254 | |
3255 | bool IsStream = !Locality; |
3256 | |
3257 | if (Locality) { |
3258 | |
3259 | assert(Locality <= 3 && "Prefetch locality out-of-range"); |
3260 | |
3261 | |
3262 | |
3263 | Locality = 3 - Locality; |
3264 | } |
3265 | |
3266 | |
3267 | unsigned PrfOp = (IsWrite << 4) | |
3268 | (!IsData << 3) | |
3269 | (Locality << 1) | |
3270 | (unsigned)IsStream; |
3271 | return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), |
3272 | DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1)); |
3273 | } |
3274 | |
3275 | SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, |
3276 | SelectionDAG &DAG) const { |
3277 | EVT VT = Op.getValueType(); |
3278 | if (VT.isScalableVector()) |
3279 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); |
3280 | |
3281 | if (useSVEForFixedLengthVectorVT(VT)) |
3282 | return LowerFixedLengthFPExtendToSVE(Op, DAG); |
3283 | |
3284 | assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); |
3285 | return SDValue(); |
3286 | } |
3287 | |
3288 | SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, |
3289 | SelectionDAG &DAG) const { |
3290 | if (Op.getValueType().isScalableVector()) |
3291 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); |
3292 | |
3293 | bool IsStrict = Op->isStrictFPOpcode(); |
3294 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3295 | EVT SrcVT = SrcVal.getValueType(); |
3296 | |
3297 | if (useSVEForFixedLengthVectorVT(SrcVT)) |
3298 | return LowerFixedLengthFPRoundToSVE(Op, DAG); |
3299 | |
3300 | if (SrcVT != MVT::f128) { |
3301 | |
3302 | if (useSVEForFixedLengthVectorVT(SrcVT)) |
3303 | return SDValue(); |
3304 | |
3305 | |
3306 | return Op; |
3307 | } |
3308 | |
3309 | return SDValue(); |
3310 | } |
3311 | |
3312 | SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, |
3313 | SelectionDAG &DAG) const { |
3314 | |
3315 | |
3316 | |
3317 | EVT InVT = Op.getOperand(0).getValueType(); |
3318 | EVT VT = Op.getValueType(); |
3319 | |
3320 | if (VT.isScalableVector()) { |
3321 | unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT |
3322 | ? AArch64ISD::FCVTZU_MERGE_PASSTHRU |
3323 | : AArch64ISD::FCVTZS_MERGE_PASSTHRU; |
3324 | return LowerToPredicatedOp(Op, DAG, Opcode); |
3325 | } |
3326 | |
3327 | if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT)) |
3328 | return LowerFixedLengthFPToIntToSVE(Op, DAG); |
3329 | |
3330 | unsigned NumElts = InVT.getVectorNumElements(); |
3331 | |
3332 | |
3333 | if (InVT.getVectorElementType() == MVT::f16 && |
3334 | !Subtarget->hasFullFP16()) { |
3335 | MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); |
3336 | SDLoc dl(Op); |
3337 | return DAG.getNode( |
3338 | Op.getOpcode(), dl, Op.getValueType(), |
3339 | DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); |
3340 | } |
3341 | |
3342 | uint64_t VTSize = VT.getFixedSizeInBits(); |
3343 | uint64_t InVTSize = InVT.getFixedSizeInBits(); |
3344 | if (VTSize < InVTSize) { |
3345 | SDLoc dl(Op); |
3346 | SDValue Cv = |
3347 | DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), |
3348 | Op.getOperand(0)); |
3349 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); |
3350 | } |
3351 | |
3352 | if (VTSize > InVTSize) { |
3353 | SDLoc dl(Op); |
3354 | MVT ExtVT = |
3355 | MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()), |
3356 | VT.getVectorNumElements()); |
3357 | SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0)); |
3358 | return DAG.getNode(Op.getOpcode(), dl, VT, Ext); |
3359 | } |
3360 | |
3361 | |
3362 | return Op; |
3363 | } |
3364 | |
3365 | SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, |
3366 | SelectionDAG &DAG) const { |
3367 | bool IsStrict = Op->isStrictFPOpcode(); |
3368 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3369 | |
3370 | if (SrcVal.getValueType().isVector()) |
3371 | return LowerVectorFP_TO_INT(Op, DAG); |
3372 | |
3373 | |
3374 | if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { |
3375 | assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); |
3376 | SDLoc dl(Op); |
3377 | return DAG.getNode( |
3378 | Op.getOpcode(), dl, Op.getValueType(), |
3379 | DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); |
3380 | } |
3381 | |
3382 | if (SrcVal.getValueType() != MVT::f128) { |
3383 | |
3384 | return Op; |
3385 | } |
3386 | |
3387 | return SDValue(); |
3388 | } |
3389 | |
3390 | SDValue |
3391 | AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op, |
3392 | SelectionDAG &DAG) const { |
3393 | |
3394 | |
3395 | SDValue SrcVal = Op.getOperand(0); |
3396 | EVT SrcVT = SrcVal.getValueType(); |
3397 | EVT DstVT = Op.getValueType(); |
3398 | EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
3399 | |
3400 | uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits(); |
3401 | uint64_t DstElementWidth = DstVT.getScalarSizeInBits(); |
3402 | uint64_t SatWidth = SatVT.getScalarSizeInBits(); |
3403 | assert(SatWidth <= DstElementWidth && |
3404 | "Saturation width cannot exceed result width"); |
3405 | |
3406 | |
3407 | |
3408 | |
3409 | if (DstVT.isScalableVector()) |
3410 | return SDValue(); |
3411 | |
3412 | |
3413 | if (SatWidth != DstElementWidth) |
3414 | return SDValue(); |
3415 | |
3416 | EVT SrcElementVT = SrcVT.getVectorElementType(); |
3417 | |
3418 | |
3419 | |
3420 | if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) { |
3421 | MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements()); |
3422 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT, |
3423 | DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal), |
3424 | Op.getOperand(1)); |
3425 | } |
3426 | |
3427 | |
3428 | if ((SrcElementWidth == DstElementWidth) && |
3429 | (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 || |
3430 | (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) { |
3431 | return Op; |
3432 | } |
3433 | |
3434 | |
3435 | return SDValue(); |
3436 | } |
3437 | |
3438 | SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, |
3439 | SelectionDAG &DAG) const { |
3440 | |
3441 | |
3442 | SDValue SrcVal = Op.getOperand(0); |
3443 | EVT SrcVT = SrcVal.getValueType(); |
3444 | |
3445 | if (SrcVT.isVector()) |
3446 | return LowerVectorFP_TO_INT_SAT(Op, DAG); |
3447 | |
3448 | EVT DstVT = Op.getValueType(); |
3449 | EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
3450 | uint64_t SatWidth = SatVT.getScalarSizeInBits(); |
3451 | uint64_t DstWidth = DstVT.getScalarSizeInBits(); |
3452 | assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width"); |
3453 | |
3454 | |
3455 | if (SatWidth != DstWidth) |
3456 | return SDValue(); |
3457 | |
3458 | |
3459 | if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) |
3460 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT, |
3461 | DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal), |
3462 | Op.getOperand(1)); |
3463 | |
3464 | |
3465 | if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 || |
3466 | (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) && |
3467 | (DstVT == MVT::i64 || DstVT == MVT::i32)) |
3468 | return Op; |
3469 | |
3470 | |
3471 | return SDValue(); |
3472 | } |
3473 | |
3474 | SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, |
3475 | SelectionDAG &DAG) const { |
3476 | |
3477 | |
3478 | |
3479 | EVT VT = Op.getValueType(); |
3480 | SDLoc dl(Op); |
3481 | SDValue In = Op.getOperand(0); |
3482 | EVT InVT = In.getValueType(); |
3483 | unsigned Opc = Op.getOpcode(); |
3484 | bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; |
3485 | |
3486 | if (VT.isScalableVector()) { |
3487 | if (InVT.getVectorElementType() == MVT::i1) { |
3488 | |
3489 | unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
3490 | EVT CastVT = getPromotedVTForPredicate(InVT); |
3491 | In = DAG.getNode(CastOpc, dl, CastVT, In); |
3492 | return DAG.getNode(Opc, dl, VT, In); |
3493 | } |
3494 | |
3495 | unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU |
3496 | : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU; |
3497 | return LowerToPredicatedOp(Op, DAG, Opcode); |
3498 | } |
3499 | |
3500 | if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT)) |
3501 | return LowerFixedLengthIntToFPToSVE(Op, DAG); |
3502 | |
3503 | uint64_t VTSize = VT.getFixedSizeInBits(); |
3504 | uint64_t InVTSize = InVT.getFixedSizeInBits(); |
3505 | if (VTSize < InVTSize) { |
3506 | MVT CastVT = |
3507 | MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), |
3508 | InVT.getVectorNumElements()); |
3509 | In = DAG.getNode(Opc, dl, CastVT, In); |
3510 | return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl)); |
3511 | } |
3512 | |
3513 | if (VTSize > InVTSize) { |
3514 | unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
3515 | EVT CastVT = VT.changeVectorElementTypeToInteger(); |
3516 | In = DAG.getNode(CastOpc, dl, CastVT, In); |
3517 | return DAG.getNode(Opc, dl, VT, In); |
3518 | } |
3519 | |
3520 | return Op; |
3521 | } |
3522 | |
3523 | SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, |
3524 | SelectionDAG &DAG) const { |
3525 | if (Op.getValueType().isVector()) |
3526 | return LowerVectorINT_TO_FP(Op, DAG); |
3527 | |
3528 | bool IsStrict = Op->isStrictFPOpcode(); |
3529 | SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); |
3530 | |
3531 | |
3532 | if (Op.getValueType() == MVT::f16 && |
3533 | !Subtarget->hasFullFP16()) { |
3534 | assert(!IsStrict && "Lowering of strict fp16 not yet implemented"); |
3535 | SDLoc dl(Op); |
3536 | return DAG.getNode( |
3537 | ISD::FP_ROUND, dl, MVT::f16, |
3538 | DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal), |
3539 | DAG.getIntPtrConstant(0, dl)); |
3540 | } |
3541 | |
3542 | |
3543 | if (SrcVal.getValueType() == MVT::i128) |
3544 | return SDValue(); |
3545 | |
3546 | |
3547 | |
3548 | if (Op.getValueType() != MVT::f128) |
3549 | return Op; |
3550 | return SDValue(); |
3551 | } |
3552 | |
3553 | SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, |
3554 | SelectionDAG &DAG) const { |
3555 | |
3556 | |
3557 | SDLoc dl(Op); |
3558 | SDValue Arg = Op.getOperand(0); |
3559 | EVT ArgVT = Arg.getValueType(); |
3560 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
3561 | |
3562 | ArgListTy Args; |
3563 | ArgListEntry Entry; |
3564 | |
3565 | Entry.Node = Arg; |
3566 | Entry.Ty = ArgTy; |
3567 | Entry.IsSExt = false; |
3568 | Entry.IsZExt = false; |
3569 | Args.push_back(Entry); |
3570 | |
3571 | RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 |
3572 | : RTLIB::SINCOS_STRET_F32; |
3573 | const char *LibcallName = getLibcallName(LC); |
3574 | SDValue Callee = |
3575 | DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); |
3576 | |
3577 | StructType *RetTy = StructType::get(ArgTy, ArgTy); |
3578 | TargetLowering::CallLoweringInfo CLI(DAG); |
3579 | CLI.setDebugLoc(dl) |
3580 | .setChain(DAG.getEntryNode()) |
3581 | .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args)); |
3582 | |
3583 | std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); |
3584 | return CallResult.first; |
3585 | } |
3586 | |
3587 | static MVT getSVEContainerType(EVT ContentTy); |
3588 | |
3589 | SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, |
3590 | SelectionDAG &DAG) const { |
3591 | EVT OpVT = Op.getValueType(); |
3592 | EVT ArgVT = Op.getOperand(0).getValueType(); |
3593 | |
3594 | if (useSVEForFixedLengthVectorVT(OpVT)) |
3595 | return LowerFixedLengthBitcastToSVE(Op, DAG); |
3596 | |
3597 | if (OpVT.isScalableVector()) { |
3598 | if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) { |
3599 | assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && |
3600 | "Expected int->fp bitcast!"); |
3601 | SDValue ExtResult = |
3602 | DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT), |
3603 | Op.getOperand(0)); |
3604 | return getSVESafeBitCast(OpVT, ExtResult, DAG); |
3605 | } |
3606 | return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG); |
3607 | } |
3608 | |
3609 | if (OpVT != MVT::f16 && OpVT != MVT::bf16) |
3610 | return SDValue(); |
3611 | |
3612 | assert(ArgVT == MVT::i16); |
3613 | SDLoc DL(Op); |
3614 | |
3615 | Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); |
3616 | Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); |
3617 | return SDValue( |
3618 | DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op, |
3619 | DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), |
3620 | 0); |
3621 | } |
3622 | |
3623 | static EVT getExtensionTo64Bits(const EVT &OrigVT) { |
3624 | if (OrigVT.getSizeInBits() >= 64) |
3625 | return OrigVT; |
3626 | |
3627 | assert(OrigVT.isSimple() && "Expecting a simple value type"); |
3628 | |
3629 | MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; |
3630 | switch (OrigSimpleTy) { |
3631 | default: llvm_unreachable("Unexpected Vector Type"); |
3632 | case MVT::v2i8: |
3633 | case MVT::v2i16: |
3634 | return MVT::v2i32; |
3635 | case MVT::v4i8: |
3636 | return MVT::v4i16; |
3637 | } |
3638 | } |
3639 | |
3640 | static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, |
3641 | const EVT &OrigTy, |
3642 | const EVT &ExtTy, |
3643 | unsigned ExtOpcode) { |
3644 | |
3645 | |
3646 | |
3647 | assert(ExtTy.is128BitVector() && "Unexpected extension size"); |
3648 | if (OrigTy.getSizeInBits() >= 64) |
3649 | return N; |
3650 | |
3651 | |
3652 | EVT NewVT = getExtensionTo64Bits(OrigTy); |
3653 | |
3654 | return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); |
3655 | } |
3656 | |
3657 | static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, |
3658 | bool isSigned) { |
3659 | EVT VT = N->getValueType(0); |
3660 | |
3661 | if (N->getOpcode() != ISD::BUILD_VECTOR) |
3662 | return false; |
3663 | |
3664 | for (const SDValue &Elt : N->op_values()) { |
3665 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { |
3666 | unsigned EltSize = VT.getScalarSizeInBits(); |
3667 | unsigned HalfSize = EltSize / 2; |
3668 | if (isSigned) { |
3669 | if (!isIntN(HalfSize, C->getSExtValue())) |
3670 | return false; |
3671 | } else { |
3672 | if (!isUIntN(HalfSize, C->getZExtValue())) |
3673 | return false; |
3674 | } |
3675 | continue; |
3676 | } |
3677 | return false; |
3678 | } |
3679 | |
3680 | return true; |
3681 | } |
3682 | |
3683 | static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) { |
3684 | if (N->getOpcode() == ISD::SIGN_EXTEND || |
3685 | N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND) |
3686 | return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG, |
3687 | N->getOperand(0)->getValueType(0), |
3688 | N->getValueType(0), |
3689 | N->getOpcode()); |
3690 | |
3691 | assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); |
3692 | EVT VT = N->getValueType(0); |
3693 | SDLoc dl(N); |
3694 | unsigned EltSize = VT.getScalarSizeInBits() / 2; |
3695 | unsigned NumElts = VT.getVectorNumElements(); |
3696 | MVT TruncVT = MVT::getIntegerVT(EltSize); |
3697 | SmallVector<SDValue, 8> Ops; |
3698 | for (unsigned i = 0; i != NumElts; ++i) { |
3699 | ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); |
3700 | const APInt &CInt = C->getAPIntValue(); |
3701 | |
3702 | |
3703 | Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); |
3704 | } |
3705 | return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); |
3706 | } |
3707 | |
3708 | static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { |
3709 | return N->getOpcode() == ISD::SIGN_EXTEND || |
3710 | N->getOpcode() == ISD::ANY_EXTEND || |
3711 | isExtendedBUILD_VECTOR(N, DAG, true); |
3712 | } |
3713 | |
3714 | static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { |
3715 | return N->getOpcode() == ISD::ZERO_EXTEND || |
3716 | N->getOpcode() == ISD::ANY_EXTEND || |
3717 | isExtendedBUILD_VECTOR(N, DAG, false); |
3718 | } |
3719 | |
3720 | static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { |
3721 | unsigned Opcode = N->getOpcode(); |
3722 | if (Opcode == ISD::ADD || Opcode == ISD::SUB) { |
3723 | SDNode *N0 = N->getOperand(0).getNode(); |
3724 | SDNode *N1 = N->getOperand(1).getNode(); |
3725 | return N0->hasOneUse() && N1->hasOneUse() && |
3726 | isSignExtended(N0, DAG) && isSignExtended(N1, DAG); |
3727 | } |
3728 | return false; |
3729 | } |
3730 | |
3731 | static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { |
3732 | unsigned Opcode = N->getOpcode(); |
3733 | if (Opcode == ISD::ADD || Opcode == ISD::SUB) { |
3734 | SDNode *N0 = N->getOperand(0).getNode(); |
3735 | SDNode *N1 = N->getOperand(1).getNode(); |
3736 | return N0->hasOneUse() && N1->hasOneUse() && |
3737 | isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); |
3738 | } |
3739 | return false; |
3740 | } |
3741 | |
3742 | SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
3743 | SelectionDAG &DAG) const { |
3744 | |
3745 | |
3746 | |
3747 | |
3748 | SDLoc dl(Op); |
3749 | |
3750 | SDValue Chain = Op.getOperand(0); |
3751 | SDValue FPCR_64 = DAG.getNode( |
3752 | ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other}, |
3753 | {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)}); |
3754 | Chain = FPCR_64.getValue(1); |
3755 | SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64); |
3756 | SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32, |
3757 | DAG.getConstant(1U << 22, dl, MVT::i32)); |
3758 | SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, |
3759 | DAG.getConstant(22, dl, MVT::i32)); |
3760 | SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, |
3761 | DAG.getConstant(3, dl, MVT::i32)); |
3762 | return DAG.getMergeValues({AND, Chain}, dl); |
3763 | } |
3764 | |
3765 | SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op, |
3766 | SelectionDAG &DAG) const { |
3767 | SDLoc DL(Op); |
3768 | SDValue Chain = Op->getOperand(0); |
3769 | SDValue RMValue = Op->getOperand(1); |
3770 | |
3771 | |
3772 | |
3773 | |
3774 | |
3775 | |
3776 | |
3777 | |
3778 | |
3779 | |
3780 | |
3781 | RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue, |
3782 | DAG.getConstant(1, DL, MVT::i32)); |
3783 | RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue, |
3784 | DAG.getConstant(0x3, DL, MVT::i32)); |
3785 | RMValue = |
3786 | DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue, |
3787 | DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32)); |
3788 | RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue); |
3789 | |
3790 | |
3791 | SDValue Ops[] = { |
3792 | Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)}; |
3793 | SDValue FPCR = |
3794 | DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops); |
3795 | Chain = FPCR.getValue(1); |
3796 | FPCR = FPCR.getValue(0); |
3797 | |
3798 | |
3799 | const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos); |
3800 | FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR, |
3801 | DAG.getConstant(RMMask, DL, MVT::i64)); |
3802 | FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue); |
3803 | SDValue Ops2[] = { |
3804 | Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64), |
3805 | FPCR}; |
3806 | return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2); |
3807 | } |
3808 | |
3809 | SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { |
3810 | EVT VT = Op.getValueType(); |
3811 | |
3812 | |
3813 | bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64; |
3814 | |
3815 | if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON)) |
3816 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON); |
3817 | |
3818 | |
3819 | |
3820 | assert(VT.is128BitVector() && VT.isInteger() && |
3821 | "unexpected type for custom-lowering ISD::MUL"); |
3822 | SDNode *N0 = Op.getOperand(0).getNode(); |
3823 | SDNode *N1 = Op.getOperand(1).getNode(); |
3824 | unsigned NewOpc = 0; |
3825 | bool isMLA = false; |
3826 | bool isN0SExt = isSignExtended(N0, DAG); |
3827 | bool isN1SExt = isSignExtended(N1, DAG); |
3828 | if (isN0SExt && isN1SExt) |
3829 | NewOpc = AArch64ISD::SMULL; |
3830 | else { |
3831 | bool isN0ZExt = isZeroExtended(N0, DAG); |
3832 | bool isN1ZExt = isZeroExtended(N1, DAG); |
3833 | if (isN0ZExt && isN1ZExt) |
3834 | NewOpc = AArch64ISD::UMULL; |
3835 | else if (isN1SExt || isN1ZExt) { |
3836 | |
3837 | |
3838 | if (isN1SExt && isAddSubSExt(N0, DAG)) { |
3839 | NewOpc = AArch64ISD::SMULL; |
3840 | isMLA = true; |
3841 | } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { |
3842 | NewOpc = AArch64ISD::UMULL; |
3843 | isMLA = true; |
3844 | } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { |
3845 | std::swap(N0, N1); |
3846 | NewOpc = AArch64ISD::UMULL; |
3847 | isMLA = true; |
3848 | } |
3849 | } |
3850 | |
3851 | if (!NewOpc) { |
3852 | if (VT == MVT::v2i64) |
3853 | |
3854 | return SDValue(); |
3855 | else |
3856 | |
3857 | return Op; |
3858 | } |
3859 | } |
3860 | |
3861 | |
3862 | SDLoc DL(Op); |
3863 | SDValue Op0; |
3864 | SDValue Op1 = skipExtensionForVectorMULL(N1, DAG); |
3865 | if (!isMLA) { |
3866 | Op0 = skipExtensionForVectorMULL(N0, DAG); |
3867 | assert(Op0.getValueType().is64BitVector() && |
3868 | Op1.getValueType().is64BitVector() && |
3869 | "unexpected types for extended operands to VMULL"); |
3870 | return DAG.getNode(NewOpc, DL, VT, Op0, Op1); |
3871 | } |
3872 | |
3873 | |
3874 | |
3875 | SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG); |
3876 | SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG); |
3877 | EVT Op1VT = Op1.getValueType(); |
3878 | return DAG.getNode(N0->getOpcode(), DL, VT, |
3879 | DAG.getNode(NewOpc, DL, VT, |
3880 | DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), |
3881 | DAG.getNode(NewOpc, DL, VT, |
3882 | DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); |
3883 | } |
3884 | |
3885 | static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, |
3886 | int Pattern) { |
3887 | return DAG.getNode(AArch64ISD::PTRUE, DL, VT, |
3888 | DAG.getTargetConstant(Pattern, DL, MVT::i32)); |
3889 | } |
3890 | |
3891 | static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { |
3892 | SDLoc DL(Op); |
3893 | EVT OutVT = Op.getValueType(); |
3894 | SDValue InOp = Op.getOperand(1); |
3895 | EVT InVT = InOp.getValueType(); |
3896 | |
3897 | |
3898 | |
3899 | if (InVT == OutVT) |
3900 | return InOp; |
3901 | |
3902 | SDValue Reinterpret = |
3903 | DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp); |
3904 | |
3905 | |
3906 | |
3907 | switch (InOp.getOpcode()) { |
3908 | case AArch64ISD::SETCC_MERGE_ZERO: |
3909 | return Reinterpret; |
3910 | case ISD::INTRINSIC_WO_CHAIN: |
3911 | if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue) |
3912 | return Reinterpret; |
3913 | } |
3914 | |
3915 | |
3916 | SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all); |
3917 | SDValue MaskReinterpret = |
3918 | DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask); |
3919 | return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); |
3920 | } |
3921 | |
3922 | SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
3923 | SelectionDAG &DAG) const { |
3924 | unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
3925 | SDLoc dl(Op); |
3926 | switch (IntNo) { |
3927 | default: return SDValue(); |
3928 | case Intrinsic::thread_pointer: { |
3929 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3930 | return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); |
3931 | } |
3932 | case Intrinsic::aarch64_neon_abs: { |
3933 | EVT Ty = Op.getValueType(); |
3934 | if (Ty == MVT::i64) { |
3935 | SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, |
3936 | Op.getOperand(1)); |
3937 | Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result); |
3938 | return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result); |
3939 | } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) { |
3940 | return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1)); |
3941 | } else { |
3942 | report_fatal_error("Unexpected type for AArch64 NEON intrinic"); |
3943 | } |
3944 | } |
3945 | case Intrinsic::aarch64_neon_smax: |
3946 | return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), |
3947 | Op.getOperand(1), Op.getOperand(2)); |
3948 | case Intrinsic::aarch64_neon_umax: |
3949 | return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), |
3950 | Op.getOperand(1), Op.getOperand(2)); |
3951 | case Intrinsic::aarch64_neon_smin: |
3952 | return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), |
3953 | Op.getOperand(1), Op.getOperand(2)); |
3954 | case Intrinsic::aarch64_neon_umin: |
3955 | return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), |
3956 | Op.getOperand(1), Op.getOperand(2)); |
3957 | |
3958 | case Intrinsic::aarch64_sve_sunpkhi: |
3959 | return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), |
3960 | Op.getOperand(1)); |
3961 | case Intrinsic::aarch64_sve_sunpklo: |
3962 | return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(), |
3963 | Op.getOperand(1)); |
3964 | case Intrinsic::aarch64_sve_uunpkhi: |
3965 | return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(), |
3966 | Op.getOperand(1)); |
3967 | case Intrinsic::aarch64_sve_uunpklo: |
3968 | return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), |
3969 | Op.getOperand(1)); |
3970 | case Intrinsic::aarch64_sve_clasta_n: |
3971 | return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(), |
3972 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
3973 | case Intrinsic::aarch64_sve_clastb_n: |
3974 | return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(), |
3975 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
3976 | case Intrinsic::aarch64_sve_lasta: |
3977 | return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(), |
3978 | Op.getOperand(1), Op.getOperand(2)); |
3979 | case Intrinsic::aarch64_sve_lastb: |
3980 | return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(), |
3981 | Op.getOperand(1), Op.getOperand(2)); |
3982 | case Intrinsic::aarch64_sve_rev: |
3983 | return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(), |
3984 | Op.getOperand(1)); |
3985 | case Intrinsic::aarch64_sve_tbl: |
3986 | return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), |
3987 | Op.getOperand(1), Op.getOperand(2)); |
3988 | case Intrinsic::aarch64_sve_trn1: |
3989 | return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(), |
3990 | Op.getOperand(1), Op.getOperand(2)); |
3991 | case Intrinsic::aarch64_sve_trn2: |
3992 | return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(), |
3993 | Op.getOperand(1), Op.getOperand(2)); |
3994 | case Intrinsic::aarch64_sve_uzp1: |
3995 | return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(), |
3996 | Op.getOperand(1), Op.getOperand(2)); |
3997 | case Intrinsic::aarch64_sve_uzp2: |
3998 | return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(), |
3999 | Op.getOperand(1), Op.getOperand(2)); |
4000 | case Intrinsic::aarch64_sve_zip1: |
4001 | return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(), |
4002 | Op.getOperand(1), Op.getOperand(2)); |
4003 | case Intrinsic::aarch64_sve_zip2: |
4004 | return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(), |
4005 | Op.getOperand(1), Op.getOperand(2)); |
4006 | case Intrinsic::aarch64_sve_splice: |
4007 | return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(), |
4008 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); |
4009 | case Intrinsic::aarch64_sve_ptrue: |
4010 | return getPTrue(DAG, dl, Op.getValueType(), |
4011 | cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); |
4012 | case Intrinsic::aarch64_sve_clz: |
4013 | return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), |
4014 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4015 | case Intrinsic::aarch64_sve_cnt: { |
4016 | SDValue Data = Op.getOperand(3); |
4017 | |
4018 | if (Data.getValueType().isFloatingPoint()) |
4019 | Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data); |
4020 | return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(), |
4021 | Op.getOperand(2), Data, Op.getOperand(1)); |
4022 | } |
4023 | case Intrinsic::aarch64_sve_dupq_lane: |
4024 | return LowerDUPQLane(Op, DAG); |
4025 | case Intrinsic::aarch64_sve_convert_from_svbool: |
4026 | return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), |
4027 | Op.getOperand(1)); |
4028 | case Intrinsic::aarch64_sve_convert_to_svbool: |
4029 | return lowerConvertToSVBool(Op, DAG); |
4030 | case Intrinsic::aarch64_sve_fneg: |
4031 | return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4032 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4033 | case Intrinsic::aarch64_sve_frintp: |
4034 | return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(), |
4035 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4036 | case Intrinsic::aarch64_sve_frintm: |
4037 | return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(), |
4038 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4039 | case Intrinsic::aarch64_sve_frinti: |
4040 | return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(), |
4041 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4042 | case Intrinsic::aarch64_sve_frintx: |
4043 | return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(), |
4044 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4045 | case Intrinsic::aarch64_sve_frinta: |
4046 | return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(), |
4047 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4048 | case Intrinsic::aarch64_sve_frintn: |
4049 | return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(), |
4050 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4051 | case Intrinsic::aarch64_sve_frintz: |
4052 | return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(), |
4053 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4054 | case Intrinsic::aarch64_sve_ucvtf: |
4055 | return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl, |
4056 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
4057 | Op.getOperand(1)); |
4058 | case Intrinsic::aarch64_sve_scvtf: |
4059 | return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl, |
4060 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
4061 | Op.getOperand(1)); |
4062 | case Intrinsic::aarch64_sve_fcvtzu: |
4063 | return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, |
4064 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
4065 | Op.getOperand(1)); |
4066 | case Intrinsic::aarch64_sve_fcvtzs: |
4067 | return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, |
4068 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
4069 | Op.getOperand(1)); |
4070 | case Intrinsic::aarch64_sve_fsqrt: |
4071 | return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(), |
4072 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4073 | case Intrinsic::aarch64_sve_frecpx: |
4074 | return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(), |
4075 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4076 | case Intrinsic::aarch64_sve_fabs: |
4077 | return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(), |
4078 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4079 | case Intrinsic::aarch64_sve_abs: |
4080 | return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(), |
4081 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4082 | case Intrinsic::aarch64_sve_neg: |
4083 | return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4084 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4085 | case Intrinsic::aarch64_sve_insr: { |
4086 | SDValue Scalar = Op.getOperand(2); |
4087 | EVT ScalarTy = Scalar.getValueType(); |
4088 | if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) |
4089 | Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); |
4090 | |
4091 | return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(), |
4092 | Op.getOperand(1), Scalar); |
4093 | } |
4094 | case Intrinsic::aarch64_sve_rbit: |
4095 | return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl, |
4096 | Op.getValueType(), Op.getOperand(2), Op.getOperand(3), |
4097 | Op.getOperand(1)); |
4098 | case Intrinsic::aarch64_sve_revb: |
4099 | return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(), |
4100 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
4101 | case Intrinsic::aarch64_sve_sxtb: |
4102 | return DAG.getNode( |
4103 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4104 | Op.getOperand(2), Op.getOperand(3), |
4105 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), |
4106 | Op.getOperand(1)); |
4107 | case Intrinsic::aarch64_sve_sxth: |
4108 | return DAG.getNode( |
4109 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4110 | Op.getOperand(2), Op.getOperand(3), |
4111 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), |
4112 | Op.getOperand(1)); |
4113 | case Intrinsic::aarch64_sve_sxtw: |
4114 | return DAG.getNode( |
4115 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4116 | Op.getOperand(2), Op.getOperand(3), |
4117 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), |
4118 | Op.getOperand(1)); |
4119 | case Intrinsic::aarch64_sve_uxtb: |
4120 | return DAG.getNode( |
4121 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4122 | Op.getOperand(2), Op.getOperand(3), |
4123 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)), |
4124 | Op.getOperand(1)); |
4125 | case Intrinsic::aarch64_sve_uxth: |
4126 | return DAG.getNode( |
4127 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4128 | Op.getOperand(2), Op.getOperand(3), |
4129 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)), |
4130 | Op.getOperand(1)); |
4131 | case Intrinsic::aarch64_sve_uxtw: |
4132 | return DAG.getNode( |
4133 | AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(), |
4134 | Op.getOperand(2), Op.getOperand(3), |
4135 | DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)), |
4136 | Op.getOperand(1)); |
4137 | |
4138 | case Intrinsic::localaddress: { |
4139 | const auto &MF = DAG.getMachineFunction(); |
4140 | const auto *RegInfo = Subtarget->getRegisterInfo(); |
4141 | unsigned Reg = RegInfo->getLocalAddressRegister(MF); |
4142 | return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, |
4143 | Op.getSimpleValueType()); |
4144 | } |
4145 | |
4146 | case Intrinsic::eh_recoverfp: { |
4147 | |
4148 | |
4149 | |
4150 | SDValue FnOp = Op.getOperand(1); |
4151 | SDValue IncomingFPOp = Op.getOperand(2); |
4152 | GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); |
4153 | auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); |
4154 | if (!Fn) |
4155 | report_fatal_error( |
4156 | "llvm.eh.recoverfp must take a function as the first argument"); |
4157 | return IncomingFPOp; |
4158 | } |
4159 | |
4160 | case Intrinsic::aarch64_neon_vsri: |
4161 | case Intrinsic::aarch64_neon_vsli: { |
4162 | EVT Ty = Op.getValueType(); |
4163 | |
4164 | if (!Ty.isVector()) |
4165 | report_fatal_error("Unexpected type for aarch64_neon_vsli"); |
4166 | |
4167 | assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()); |
4168 | |
4169 | bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri; |
4170 | unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI; |
4171 | return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2), |
4172 | Op.getOperand(3)); |
4173 | } |
4174 | |
4175 | case Intrinsic::aarch64_neon_srhadd: |
4176 | case Intrinsic::aarch64_neon_urhadd: |
4177 | case Intrinsic::aarch64_neon_shadd: |
4178 | case Intrinsic::aarch64_neon_uhadd: { |
4179 | bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || |
4180 | IntNo == Intrinsic::aarch64_neon_shadd); |
4181 | bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || |
4182 | IntNo == Intrinsic::aarch64_neon_urhadd); |
4183 | unsigned Opcode = |
4184 | IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD) |
4185 | : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD); |
4186 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
4187 | Op.getOperand(2)); |
4188 | } |
4189 | case Intrinsic::aarch64_neon_sabd: |
4190 | case Intrinsic::aarch64_neon_uabd: { |
4191 | unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU |
4192 | : ISD::ABDS; |
4193 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
4194 | Op.getOperand(2)); |
4195 | } |
4196 | case Intrinsic::aarch64_neon_uaddlp: { |
4197 | unsigned Opcode = AArch64ISD::UADDLP; |
4198 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1)); |
4199 | } |
4200 | case Intrinsic::aarch64_neon_sdot: |
4201 | case Intrinsic::aarch64_neon_udot: |
4202 | case Intrinsic::aarch64_sve_sdot: |
4203 | case Intrinsic::aarch64_sve_udot: { |
4204 | unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot || |
4205 | IntNo == Intrinsic::aarch64_sve_udot) |
4206 | ? AArch64ISD::UDOT |
4207 | : AArch64ISD::SDOT; |
4208 | return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), |
4209 | Op.getOperand(2), Op.getOperand(3)); |
4210 | } |
4211 | } |
4212 | } |
4213 | |
4214 | bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const { |
4215 | if (VT.getVectorElementType() == MVT::i8 || |
4216 | VT.getVectorElementType() == MVT::i16) { |
4217 | EltTy = MVT::i32; |
4218 | return true; |
4219 | } |
4220 | return false; |
4221 | } |
4222 | |
4223 | bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { |
4224 | if (VT.getVectorElementType() == MVT::i32 && |
4225 | VT.getVectorElementCount().getKnownMinValue() >= 4 && |
4226 | !VT.isFixedLengthVector()) |
4227 | return true; |
4228 | |
4229 | return false; |
4230 | } |
4231 | |
4232 | bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { |
4233 | return ExtVal.getValueType().isScalableVector() || |
4234 | useSVEForFixedLengthVectorVT(ExtVal.getValueType(), |
4235 | true); |
4236 | } |
4237 | |
4238 | unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { |
4239 | std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { |
4240 | {std::make_tuple( false, false, false), |
4241 | AArch64ISD::GLD1_MERGE_ZERO}, |
4242 | {std::make_tuple( false, false, true), |
4243 | AArch64ISD::GLD1_UXTW_MERGE_ZERO}, |
4244 | {std::make_tuple( false, true, false), |
4245 | AArch64ISD::GLD1_MERGE_ZERO}, |
4246 | {std::make_tuple( false, true, true), |
4247 | AArch64ISD::GLD1_SXTW_MERGE_ZERO}, |
4248 | {std::make_tuple( true, false, false), |
4249 | AArch64ISD::GLD1_SCALED_MERGE_ZERO}, |
4250 | {std::make_tuple( true, false, true), |
4251 | AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO}, |
4252 | {std::make_tuple( true, true, false), |
4253 | AArch64ISD::GLD1_SCALED_MERGE_ZERO}, |
4254 | {std::make_tuple( true, true, true), |
4255 | AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO}, |
4256 | }; |
4257 | auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); |
4258 | return AddrModes.find(Key)->second; |
4259 | } |
4260 | |
4261 | unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) { |
4262 | std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = { |
4263 | {std::make_tuple( false, false, false), |
4264 | AArch64ISD::SST1_PRED}, |
4265 | {std::make_tuple( false, false, true), |
4266 | AArch64ISD::SST1_UXTW_PRED}, |
4267 | {std::make_tuple( false, true, false), |
4268 | AArch64ISD::SST1_PRED}, |
4269 | {std::make_tuple( false, true, true), |
4270 | AArch64ISD::SST1_SXTW_PRED}, |
4271 | {std::make_tuple( true, false, false), |
4272 | AArch64ISD::SST1_SCALED_PRED}, |
4273 | {std::make_tuple( true, false, true), |
4274 | AArch64ISD::SST1_UXTW_SCALED_PRED}, |
4275 | {std::make_tuple( true, true, false), |
4276 | AArch64ISD::SST1_SCALED_PRED}, |
4277 | {std::make_tuple( true, true, true), |
4278 | AArch64ISD::SST1_SXTW_SCALED_PRED}, |
4279 | }; |
4280 | auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend); |
4281 | return AddrModes.find(Key)->second; |
4282 | } |
4283 | |
4284 | unsigned getSignExtendedGatherOpcode(unsigned Opcode) { |
4285 | switch (Opcode) { |
4286 | default: |
4287 | llvm_unreachable("unimplemented opcode"); |
4288 | return Opcode; |
4289 | case AArch64ISD::GLD1_MERGE_ZERO: |
4290 | return AArch64ISD::GLD1S_MERGE_ZERO; |
4291 | case AArch64ISD::GLD1_IMM_MERGE_ZERO: |
4292 | return AArch64ISD::GLD1S_IMM_MERGE_ZERO; |
4293 | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: |
4294 | return AArch64ISD::GLD1S_UXTW_MERGE_ZERO; |
4295 | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: |
4296 | return AArch64ISD::GLD1S_SXTW_MERGE_ZERO; |
4297 | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: |
4298 | return AArch64ISD::GLD1S_SCALED_MERGE_ZERO; |
4299 | case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: |
4300 | return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; |
4301 | case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: |
4302 | return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; |
4303 | } |
4304 | } |
4305 | |
4306 | bool getGatherScatterIndexIsExtended(SDValue Index) { |
4307 | unsigned Opcode = Index.getOpcode(); |
4308 | if (Opcode == ISD::SIGN_EXTEND_INREG) |
4309 | return true; |
4310 | |
4311 | if (Opcode == ISD::AND) { |
4312 | SDValue Splat = Index.getOperand(1); |
4313 | if (Splat.getOpcode() != ISD::SPLAT_VECTOR) |
4314 | return false; |
4315 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0)); |
4316 | if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF) |
4317 | return false; |
4318 | return true; |
4319 | } |
4320 | |
4321 | return false; |
4322 | } |
4323 | |
4324 | |
4325 | |
4326 | |
4327 | |
4328 | |
4329 | |
4330 | |
4331 | |
4332 | |
4333 | void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, |
4334 | unsigned &Opcode, bool IsGather, |
4335 | SelectionDAG &DAG) { |
4336 | if (!isNullConstant(BasePtr)) |
4337 | return; |
4338 | |
4339 | |
4340 | |
4341 | |
4342 | |
4343 | |
4344 | |
4345 | ConstantSDNode *Offset = nullptr; |
4346 | if (Index.getOpcode() == ISD::ADD) |
4347 | if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) { |
4348 | if (isa<ConstantSDNode>(SplatVal)) |
4349 | Offset = cast<ConstantSDNode>(SplatVal); |
4350 | else { |
4351 | BasePtr = SplatVal; |
4352 | Index = Index->getOperand(0); |
4353 | return; |
4354 | } |
4355 | } |
4356 | |
4357 | unsigned NewOp = |
4358 | IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED; |
4359 | |
4360 | if (!Offset) { |
4361 | std::swap(BasePtr, Index); |
4362 | Opcode = NewOp; |
4363 | return; |
4364 | } |
4365 | |
4366 | uint64_t OffsetVal = Offset->getZExtValue(); |
4367 | unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8; |
4368 | auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64); |
4369 | |
4370 | if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) { |
4371 | |
4372 | BasePtr = ConstOffset; |
4373 | Index = Index->getOperand(0); |
4374 | return; |
4375 | } |
4376 | |
4377 | |
4378 | Opcode = NewOp; |
4379 | BasePtr = Index->getOperand(0); |
4380 | Index = ConstOffset; |
4381 | } |
4382 | |
4383 | SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, |
4384 | SelectionDAG &DAG) const { |
4385 | SDLoc DL(Op); |
4386 | MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op); |
4387 | assert(MGT && "Can only custom lower gather load nodes"); |
4388 | |
4389 | bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector(); |
4390 | |
4391 | SDValue Index = MGT->getIndex(); |
4392 | SDValue Chain = MGT->getChain(); |
4393 | SDValue PassThru = MGT->getPassThru(); |
4394 | SDValue Mask = MGT->getMask(); |
4395 | SDValue BasePtr = MGT->getBasePtr(); |
4396 | ISD::LoadExtType ExtTy = MGT->getExtensionType(); |
4397 | |
4398 | ISD::MemIndexType IndexType = MGT->getIndexType(); |
4399 | bool IsScaled = |
4400 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; |
4401 | bool IsSigned = |
4402 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; |
4403 | bool IdxNeedsExtend = |
4404 | getGatherScatterIndexIsExtended(Index) || |
4405 | Index.getSimpleValueType().getVectorElementType() == MVT::i32; |
4406 | bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD; |
4407 | |
4408 | EVT VT = PassThru.getSimpleValueType(); |
4409 | EVT IndexVT = Index.getSimpleValueType(); |
4410 | EVT MemVT = MGT->getMemoryVT(); |
4411 | SDValue InputVT = DAG.getValueType(MemVT); |
4412 | |
4413 | if (VT.getVectorElementType() == MVT::bf16 && |
4414 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
4415 | return SDValue(); |
4416 | |
4417 | if (IsFixedLength) { |
4418 | assert(Subtarget->useSVEForFixedLengthVectors() && |
4419 | "Cannot lower when not using SVE for fixed vectors"); |
4420 | if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { |
4421 | IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); |
4422 | MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); |
4423 | } else { |
4424 | MemVT = getContainerForFixedLengthVector(DAG, MemVT); |
4425 | IndexVT = MemVT.changeTypeToInteger(); |
4426 | } |
4427 | InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); |
4428 | Mask = DAG.getNode( |
4429 | ISD::ZERO_EXTEND, DL, |
4430 | VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); |
4431 | } |
4432 | |
4433 | if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) |
4434 | PassThru = SDValue(); |
4435 | |
4436 | if (VT.isFloatingPoint() && !IsFixedLength) { |
4437 | |
4438 | if (PassThru) { |
4439 | EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount()); |
4440 | PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG); |
4441 | } |
4442 | InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); |
4443 | } |
4444 | |
4445 | SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other); |
4446 | |
4447 | if (getGatherScatterIndexIsExtended(Index)) |
4448 | Index = Index.getOperand(0); |
4449 | |
4450 | unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend); |
4451 | selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, |
4452 | true, DAG); |
4453 | |
4454 | if (ResNeedsSignExtend) |
4455 | Opcode = getSignExtendedGatherOpcode(Opcode); |
4456 | |
4457 | if (IsFixedLength) { |
4458 | if (Index.getSimpleValueType().isFixedLengthVector()) |
4459 | Index = convertToScalableVector(DAG, IndexVT, Index); |
4460 | if (BasePtr.getSimpleValueType().isFixedLengthVector()) |
4461 | BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); |
4462 | Mask = convertFixedMaskToScalableVector(Mask, DAG); |
4463 | } |
4464 | |
4465 | SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT}; |
4466 | SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops); |
4467 | Chain = Result.getValue(1); |
4468 | |
4469 | if (IsFixedLength) { |
4470 | Result = convertFromScalableVector( |
4471 | DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()), |
4472 | Result); |
4473 | Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result); |
4474 | Result = DAG.getNode(ISD::BITCAST, DL, VT, Result); |
4475 | |
4476 | if (PassThru) |
4477 | Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru); |
4478 | } else { |
4479 | if (PassThru) |
4480 | Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru); |
4481 | |
4482 | if (VT.isFloatingPoint()) |
4483 | Result = getSVESafeBitCast(VT, Result, DAG); |
4484 | } |
4485 | |
4486 | return DAG.getMergeValues({Result, Chain}, DL); |
4487 | } |
4488 | |
4489 | SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, |
4490 | SelectionDAG &DAG) const { |
4491 | SDLoc DL(Op); |
4492 | MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op); |
4493 | assert(MSC && "Can only custom lower scatter store nodes"); |
4494 | |
4495 | bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector(); |
4496 | |
4497 | SDValue Index = MSC->getIndex(); |
4498 | SDValue Chain = MSC->getChain(); |
4499 | SDValue StoreVal = MSC->getValue(); |
4500 | SDValue Mask = MSC->getMask(); |
4501 | SDValue BasePtr = MSC->getBasePtr(); |
4502 | |
4503 | ISD::MemIndexType IndexType = MSC->getIndexType(); |
4504 | bool IsScaled = |
4505 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED; |
4506 | bool IsSigned = |
4507 | IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED; |
4508 | bool NeedsExtend = |
4509 | getGatherScatterIndexIsExtended(Index) || |
4510 | Index.getSimpleValueType().getVectorElementType() == MVT::i32; |
4511 | |
4512 | EVT VT = StoreVal.getSimpleValueType(); |
4513 | EVT IndexVT = Index.getSimpleValueType(); |
4514 | SDVTList VTs = DAG.getVTList(MVT::Other); |
4515 | EVT MemVT = MSC->getMemoryVT(); |
4516 | SDValue InputVT = DAG.getValueType(MemVT); |
4517 | |
4518 | if (VT.getVectorElementType() == MVT::bf16 && |
4519 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
4520 | return SDValue(); |
4521 | |
4522 | if (IsFixedLength) { |
4523 | assert(Subtarget->useSVEForFixedLengthVectors() && |
4524 | "Cannot lower when not using SVE for fixed vectors"); |
4525 | if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { |
4526 | IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); |
4527 | MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); |
4528 | } else { |
4529 | MemVT = getContainerForFixedLengthVector(DAG, MemVT); |
4530 | IndexVT = MemVT.changeTypeToInteger(); |
4531 | } |
4532 | InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); |
4533 | |
4534 | StoreVal = |
4535 | DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal); |
4536 | StoreVal = DAG.getNode( |
4537 | ISD::ANY_EXTEND, DL, |
4538 | VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); |
4539 | StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); |
4540 | Mask = DAG.getNode( |
4541 | ISD::ZERO_EXTEND, DL, |
4542 | VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); |
4543 | } else if (VT.isFloatingPoint()) { |
4544 | |
4545 | EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); |
4546 | StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG); |
4547 | InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); |
4548 | } |
4549 | |
4550 | if (getGatherScatterIndexIsExtended(Index)) |
4551 | Index = Index.getOperand(0); |
4552 | |
4553 | unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend); |
4554 | selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode, |
4555 | false, DAG); |
4556 | |
4557 | if (IsFixedLength) { |
4558 | if (Index.getSimpleValueType().isFixedLengthVector()) |
4559 | Index = convertToScalableVector(DAG, IndexVT, Index); |
4560 | if (BasePtr.getSimpleValueType().isFixedLengthVector()) |
4561 | BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr); |
4562 | Mask = convertFixedMaskToScalableVector(Mask, DAG); |
4563 | } |
4564 | |
4565 | SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; |
4566 | return DAG.getNode(Opcode, DL, VTs, Ops); |
4567 | } |
4568 | |
4569 | SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const { |
4570 | SDLoc DL(Op); |
4571 | MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op); |
4572 | assert(LoadNode && "Expected custom lowering of a masked load node"); |
4573 | EVT VT = Op->getValueType(0); |
4574 | |
4575 | if (useSVEForFixedLengthVectorVT(VT, true)) |
4576 | return LowerFixedLengthVectorMLoadToSVE(Op, DAG); |
4577 | |
4578 | SDValue PassThru = LoadNode->getPassThru(); |
4579 | SDValue Mask = LoadNode->getMask(); |
4580 | |
4581 | if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) |
4582 | return Op; |
4583 | |
4584 | SDValue Load = DAG.getMaskedLoad( |
4585 | VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(), |
4586 | LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(), |
4587 | LoadNode->getMemOperand(), LoadNode->getAddressingMode(), |
4588 | LoadNode->getExtensionType()); |
4589 | |
4590 | SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru); |
4591 | |
4592 | return DAG.getMergeValues({Result, Load.getValue(1)}, DL); |
4593 | } |
4594 | |
4595 | |
4596 | static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, |
4597 | EVT VT, EVT MemVT, |
4598 | SelectionDAG &DAG) { |
4599 | assert(VT.isVector() && "VT should be a vector type"); |
4600 | assert(MemVT == MVT::v4i8 && VT == MVT::v4i16); |
4601 | |
4602 | SDValue Value = ST->getValue(); |
4603 | |
4604 | |
4605 | |
4606 | |
4607 | |
4608 | |
4609 | |
4610 | |
4611 | SDValue Undef = DAG.getUNDEF(MVT::i16); |
4612 | SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL, |
4613 | {Undef, Undef, Undef, Undef}); |
4614 | |
4615 | SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, |
4616 | Value, UndefVec); |
4617 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt); |
4618 | |
4619 | Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc); |
4620 | SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, |
4621 | Trunc, DAG.getConstant(0, DL, MVT::i64)); |
4622 | |
4623 | return DAG.getStore(ST->getChain(), DL, ExtractTrunc, |
4624 | ST->getBasePtr(), ST->getMemOperand()); |
4625 | } |
4626 | |
4627 | |
4628 | |
4629 | |
4630 | SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, |
4631 | SelectionDAG &DAG) const { |
4632 | SDLoc Dl(Op); |
4633 | StoreSDNode *StoreNode = cast<StoreSDNode>(Op); |
4634 | assert (StoreNode && "Can only custom lower store nodes"); |
4635 | |
4636 | SDValue Value = StoreNode->getValue(); |
4637 | |
4638 | EVT VT = Value.getValueType(); |
4639 | EVT MemVT = StoreNode->getMemoryVT(); |
4640 | |
4641 | if (VT.isVector()) { |
4642 | if (useSVEForFixedLengthVectorVT(VT, true)) |
4643 | return LowerFixedLengthVectorStoreToSVE(Op, DAG); |
4644 | |
4645 | unsigned AS = StoreNode->getAddressSpace(); |
4646 | Align Alignment = StoreNode->getAlign(); |
4647 | if (Alignment < MemVT.getStoreSize() && |
4648 | !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment, |
4649 | StoreNode->getMemOperand()->getFlags(), |
4650 | nullptr)) { |
4651 | return scalarizeVectorStore(StoreNode, DAG); |
4652 | } |
4653 | |
4654 | if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 && |
4655 | MemVT == MVT::v4i8) { |
4656 | return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); |
4657 | } |
4658 | |
4659 | |
4660 | |
4661 | ElementCount EC = MemVT.getVectorElementCount(); |
4662 | if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && |
4663 | EC.isKnownEven() && |
4664 | ((MemVT.getScalarSizeInBits() == 8u || |
4665 | MemVT.getScalarSizeInBits() == 16u || |
4666 | MemVT.getScalarSizeInBits() == 32u || |
4667 | MemVT.getScalarSizeInBits() == 64u))) { |
4668 | SDValue Lo = |
4669 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, |
4670 | MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), |
4671 | StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64)); |
4672 | SDValue Hi = |
4673 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, |
4674 | MemVT.getHalfNumVectorElementsVT(*DAG.getContext()), |
4675 | StoreNode->getValue(), |
4676 | DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64)); |
4677 | SDValue Result = DAG.getMemIntrinsicNode( |
4678 | AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other), |
4679 | {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, |
4680 | StoreNode->getMemoryVT(), StoreNode->getMemOperand()); |
4681 | return Result; |
4682 | } |
4683 | } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) { |
4684 | assert(StoreNode->getValue()->getValueType(0) == MVT::i128); |
4685 | SDValue Lo = |
4686 | DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), |
4687 | DAG.getConstant(0, Dl, MVT::i64)); |
4688 | SDValue Hi = |
4689 | DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), |
4690 | DAG.getConstant(1, Dl, MVT::i64)); |
4691 | SDValue Result = DAG.getMemIntrinsicNode( |
4692 | AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other), |
4693 | {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, |
4694 | StoreNode->getMemoryVT(), StoreNode->getMemOperand()); |
4695 | return Result; |
4696 | } else if (MemVT == MVT::i64x8) { |
4697 | SDValue Value = StoreNode->getValue(); |
4698 | assert(Value->getValueType(0) == MVT::i64x8); |
4699 | SDValue Chain = StoreNode->getChain(); |
4700 | SDValue Base = StoreNode->getBasePtr(); |
4701 | EVT PtrVT = Base.getValueType(); |
4702 | for (unsigned i = 0; i < 8; i++) { |
4703 | SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, |
4704 | Value, DAG.getConstant(i, Dl, MVT::i32)); |
4705 | SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base, |
4706 | DAG.getConstant(i * 8, Dl, PtrVT)); |
4707 | Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(), |
4708 | StoreNode->getOriginalAlign()); |
4709 | } |
4710 | return Chain; |
4711 | } |
4712 | |
4713 | return SDValue(); |
4714 | } |
4715 | |
4716 | SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, |
4717 | SelectionDAG &DAG) const { |
4718 | SDLoc DL(Op); |
4719 | LoadSDNode *LoadNode = cast<LoadSDNode>(Op); |
4720 | assert(LoadNode && "Expected custom lowering of a load node"); |
4721 | |
4722 | if (LoadNode->getMemoryVT() == MVT::i64x8) { |
4723 | SmallVector<SDValue, 8> Ops; |
4724 | SDValue Base = LoadNode->getBasePtr(); |
4725 | SDValue Chain = LoadNode->getChain(); |
4726 | EVT PtrVT = Base.getValueType(); |
4727 | for (unsigned i = 0; i < 8; i++) { |
4728 | SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, |
4729 | DAG.getConstant(i * 8, DL, PtrVT)); |
4730 | SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr, |
4731 | LoadNode->getPointerInfo(), |
4732 | LoadNode->getOriginalAlign()); |
4733 | Ops.push_back(Part); |
4734 | Chain = SDValue(Part.getNode(), 1); |
4735 | } |
4736 | SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops); |
4737 | return DAG.getMergeValues({Loaded, Chain}, DL); |
4738 | } |
4739 | |
4740 | |
4741 | EVT VT = Op->getValueType(0); |
4742 | assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); |
4743 | |
4744 | if (LoadNode->getMemoryVT() != MVT::v4i8) |
4745 | return SDValue(); |
4746 | |
4747 | unsigned ExtType; |
4748 | if (LoadNode->getExtensionType() == ISD::SEXTLOAD) |
4749 | ExtType = ISD::SIGN_EXTEND; |
4750 | else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || |
4751 | LoadNode->getExtensionType() == ISD::EXTLOAD) |
4752 | ExtType = ISD::ZERO_EXTEND; |
4753 | else |
4754 | return SDValue(); |
4755 | |
4756 | SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(), |
4757 | LoadNode->getBasePtr(), MachinePointerInfo()); |
4758 | SDValue Chain = Load.getValue(1); |
4759 | SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load); |
4760 | SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec); |
4761 | SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC); |
4762 | Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext, |
4763 | DAG.getConstant(0, DL, MVT::i64)); |
4764 | if (VT == MVT::v4i32) |
4765 | Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext); |
4766 | return DAG.getMergeValues({Ext, Chain}, DL); |
4767 | } |
4768 | |
4769 | |
4770 | SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { |
4771 | MVT VT = Op.getSimpleValueType(); |
4772 | |
4773 | if (VT.isVector()) |
4774 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); |
4775 | |
4776 | SDLoc DL(Op); |
4777 | SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
4778 | Op.getOperand(0)); |
4779 | |
4780 | SDValue Cmp = |
4781 | DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), |
4782 | Op.getOperand(0), DAG.getConstant(0, DL, VT)); |
4783 | return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, |
4784 | DAG.getConstant(AArch64CC::PL, DL, MVT::i32), |
4785 | Cmp.getValue(1)); |
4786 | } |
4787 | |
4788 | SDValue AArch64TargetLowering::LowerOperation(SDValue Op, |
4789 | SelectionDAG &DAG) const { |
4790 | LLVM_DEBUG(dbgs() << "Custom lowering: "); |
4791 | LLVM_DEBUG(Op.dump()); |
4792 | |
4793 | switch (Op.getOpcode()) { |
4794 | default: |
4795 | llvm_unreachable("unimplemented operand"); |
4796 | return SDValue(); |
4797 | case ISD::BITCAST: |
4798 | return LowerBITCAST(Op, DAG); |
4799 | case ISD::GlobalAddress: |
4800 | return LowerGlobalAddress(Op, DAG); |
4801 | case ISD::GlobalTLSAddress: |
4802 | return LowerGlobalTLSAddress(Op, DAG); |
4803 | case ISD::SETCC: |
4804 | case ISD::STRICT_FSETCC: |
4805 | case ISD::STRICT_FSETCCS: |
4806 | return LowerSETCC(Op, DAG); |
4807 | case ISD::BR_CC: |
4808 | return LowerBR_CC(Op, DAG); |
4809 | case ISD::SELECT: |
4810 | return LowerSELECT(Op, DAG); |
4811 | case ISD::SELECT_CC: |
4812 | return LowerSELECT_CC(Op, DAG); |
4813 | case ISD::JumpTable: |
4814 | return LowerJumpTable(Op, DAG); |
4815 | case ISD::BR_JT: |
4816 | return LowerBR_JT(Op, DAG); |
4817 | case ISD::ConstantPool: |
4818 | return LowerConstantPool(Op, DAG); |
4819 | case ISD::BlockAddress: |
4820 | return LowerBlockAddress(Op, DAG); |
4821 | case ISD::VASTART: |
4822 | return LowerVASTART(Op, DAG); |
4823 | case ISD::VACOPY: |
4824 | return LowerVACOPY(Op, DAG); |
4825 | case ISD::VAARG: |
4826 | return LowerVAARG(Op, DAG); |
4827 | case ISD::ADDC: |
4828 | case ISD::ADDE: |
4829 | case ISD::SUBC: |
4830 | case ISD::SUBE: |
4831 | return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); |
4832 | case ISD::SADDO: |
4833 | case ISD::UADDO: |
4834 | case ISD::SSUBO: |
4835 | case ISD::USUBO: |
4836 | case ISD::SMULO: |
4837 | case ISD::UMULO: |
4838 | return LowerXALUO(Op, DAG); |
4839 | case ISD::FADD: |
4840 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); |
4841 | case ISD::FSUB: |
4842 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED); |
4843 | case ISD::FMUL: |
4844 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED); |
4845 | case ISD::FMA: |
4846 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED); |
4847 | case ISD::FDIV: |
4848 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); |
4849 | case ISD::FNEG: |
4850 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); |
4851 | case ISD::FCEIL: |
4852 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU); |
4853 | case ISD::FFLOOR: |
4854 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU); |
4855 | case ISD::FNEARBYINT: |
4856 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU); |
4857 | case ISD::FRINT: |
4858 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU); |
4859 | case ISD::FROUND: |
4860 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU); |
4861 | case ISD::FROUNDEVEN: |
4862 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU); |
4863 | case ISD::FTRUNC: |
4864 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU); |
4865 | case ISD::FSQRT: |
4866 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU); |
4867 | case ISD::FABS: |
4868 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU); |
4869 | case ISD::FP_ROUND: |
4870 | case ISD::STRICT_FP_ROUND: |
4871 | return LowerFP_ROUND(Op, DAG); |
4872 | case ISD::FP_EXTEND: |
4873 | return LowerFP_EXTEND(Op, DAG); |
4874 | case ISD::FRAMEADDR: |
4875 | return LowerFRAMEADDR(Op, DAG); |
4876 | case ISD::SPONENTRY: |
4877 | return LowerSPONENTRY(Op, DAG); |
4878 | case ISD::RETURNADDR: |
4879 | return LowerRETURNADDR(Op, DAG); |
4880 | case ISD::ADDROFRETURNADDR: |
4881 | return LowerADDROFRETURNADDR(Op, DAG); |
4882 | case ISD::CONCAT_VECTORS: |
4883 | return LowerCONCAT_VECTORS(Op, DAG); |
4884 | case ISD::INSERT_VECTOR_ELT: |
4885 | return LowerINSERT_VECTOR_ELT(Op, DAG); |
4886 | case ISD::EXTRACT_VECTOR_ELT: |
4887 | return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
4888 | case ISD::BUILD_VECTOR: |
4889 | return LowerBUILD_VECTOR(Op, DAG); |
4890 | case ISD::VECTOR_SHUFFLE: |
4891 | return LowerVECTOR_SHUFFLE(Op, DAG); |
4892 | case ISD::SPLAT_VECTOR: |
4893 | return LowerSPLAT_VECTOR(Op, DAG); |
4894 | case ISD::EXTRACT_SUBVECTOR: |
4895 | return LowerEXTRACT_SUBVECTOR(Op, DAG); |
4896 | case ISD::INSERT_SUBVECTOR: |
4897 | return LowerINSERT_SUBVECTOR(Op, DAG); |
4898 | case ISD::SDIV: |
4899 | case ISD::UDIV: |
4900 | return LowerDIV(Op, DAG); |
4901 | case ISD::SMIN: |
4902 | case ISD::UMIN: |
4903 | case ISD::SMAX: |
4904 | case ISD::UMAX: |
4905 | return LowerMinMax(Op, DAG); |
4906 | case ISD::SRA: |
4907 | case ISD::SRL: |
4908 | case ISD::SHL: |
4909 | return LowerVectorSRA_SRL_SHL(Op, DAG); |
4910 | case ISD::SHL_PARTS: |
4911 | case ISD::SRL_PARTS: |
4912 | case ISD::SRA_PARTS: |
4913 | return LowerShiftParts(Op, DAG); |
4914 | case ISD::CTPOP: |
4915 | return LowerCTPOP(Op, DAG); |
4916 | case ISD::FCOPYSIGN: |
4917 | return LowerFCOPYSIGN(Op, DAG); |
4918 | case ISD::OR: |
4919 | return LowerVectorOR(Op, DAG); |
4920 | case ISD::XOR: |
4921 | return LowerXOR(Op, DAG); |
4922 | case ISD::PREFETCH: |
4923 | return LowerPREFETCH(Op, DAG); |
4924 | case ISD::SINT_TO_FP: |
4925 | case ISD::UINT_TO_FP: |
4926 | case ISD::STRICT_SINT_TO_FP: |
4927 | case ISD::STRICT_UINT_TO_FP: |
4928 | return LowerINT_TO_FP(Op, DAG); |
4929 | case ISD::FP_TO_SINT: |
4930 | case ISD::FP_TO_UINT: |
4931 | case ISD::STRICT_FP_TO_SINT: |
4932 | case ISD::STRICT_FP_TO_UINT: |
4933 | return LowerFP_TO_INT(Op, DAG); |
4934 | case ISD::FP_TO_SINT_SAT: |
4935 | case ISD::FP_TO_UINT_SAT: |
4936 | return LowerFP_TO_INT_SAT(Op, DAG); |
4937 | case ISD::FSINCOS: |
4938 | return LowerFSINCOS(Op, DAG); |
4939 | case ISD::FLT_ROUNDS_: |
4940 | return LowerFLT_ROUNDS_(Op, DAG); |
4941 | case ISD::SET_ROUNDING: |
4942 | return LowerSET_ROUNDING(Op, DAG); |
4943 | case ISD::MUL: |
4944 | return LowerMUL(Op, DAG); |
4945 | case ISD::MULHS: |
4946 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED, |
4947 | true); |
4948 | case ISD::MULHU: |
4949 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, |
4950 | true); |
4951 | case ISD::INTRINSIC_WO_CHAIN: |
4952 | return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
4953 | case ISD::STORE: |
4954 | return LowerSTORE(Op, DAG); |
4955 | case ISD::MSTORE: |
4956 | return LowerFixedLengthVectorMStoreToSVE(Op, DAG); |
4957 | case ISD::MGATHER: |
4958 | return LowerMGATHER(Op, DAG); |
4959 | case ISD::MSCATTER: |
4960 | return LowerMSCATTER(Op, DAG); |
4961 | case ISD::VECREDUCE_SEQ_FADD: |
4962 | return LowerVECREDUCE_SEQ_FADD(Op, DAG); |
4963 | case ISD::VECREDUCE_ADD: |
4964 | case ISD::VECREDUCE_AND: |
4965 | case ISD::VECREDUCE_OR: |
4966 | case ISD::VECREDUCE_XOR: |
4967 | case ISD::VECREDUCE_SMAX: |
4968 | case ISD::VECREDUCE_SMIN: |
4969 | case ISD::VECREDUCE_UMAX: |
4970 | case ISD::VECREDUCE_UMIN: |
4971 | case ISD::VECREDUCE_FADD: |
4972 | case ISD::VECREDUCE_FMAX: |
4973 | case ISD::VECREDUCE_FMIN: |
4974 | return LowerVECREDUCE(Op, DAG); |
4975 | case ISD::ATOMIC_LOAD_SUB: |
4976 | return LowerATOMIC_LOAD_SUB(Op, DAG); |
4977 | case ISD::ATOMIC_LOAD_AND: |
4978 | return LowerATOMIC_LOAD_AND(Op, DAG); |
4979 | case ISD::DYNAMIC_STACKALLOC: |
4980 | return LowerDYNAMIC_STACKALLOC(Op, DAG); |
4981 | case ISD::VSCALE: |
4982 | return LowerVSCALE(Op, DAG); |
4983 | case ISD::ANY_EXTEND: |
4984 | case ISD::SIGN_EXTEND: |
4985 | case ISD::ZERO_EXTEND: |
4986 | return LowerFixedLengthVectorIntExtendToSVE(Op, DAG); |
4987 | case ISD::SIGN_EXTEND_INREG: { |
4988 | |
4989 | EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
4990 | EVT ExtraEltVT = ExtraVT.getVectorElementType(); |
4991 | if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) && |
4992 | (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64)) |
4993 | return SDValue(); |
4994 | |
4995 | return LowerToPredicatedOp(Op, DAG, |
4996 | AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU); |
4997 | } |
4998 | case ISD::TRUNCATE: |
4999 | return LowerTRUNCATE(Op, DAG); |
5000 | case ISD::MLOAD: |
5001 | return LowerMLOAD(Op, DAG); |
5002 | case ISD::LOAD: |
5003 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
5004 | return LowerFixedLengthVectorLoadToSVE(Op, DAG); |
5005 | return LowerLOAD(Op, DAG); |
5006 | case ISD::ADD: |
5007 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); |
5008 | case ISD::AND: |
5009 | return LowerToScalableOp(Op, DAG); |
5010 | case ISD::SUB: |
5011 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED); |
5012 | case ISD::FMAXIMUM: |
5013 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED); |
5014 | case ISD::FMAXNUM: |
5015 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED); |
5016 | case ISD::FMINIMUM: |
5017 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED); |
5018 | case ISD::FMINNUM: |
5019 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED); |
5020 | case ISD::VSELECT: |
5021 | return LowerFixedLengthVectorSelectToSVE(Op, DAG); |
5022 | case ISD::ABS: |
5023 | return LowerABS(Op, DAG); |
5024 | case ISD::BITREVERSE: |
5025 | return LowerBitreverse(Op, DAG); |
5026 | case ISD::BSWAP: |
5027 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU); |
5028 | case ISD::CTLZ: |
5029 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU, |
5030 | true); |
5031 | case ISD::CTTZ: |
5032 | return LowerCTTZ(Op, DAG); |
5033 | case ISD::VECTOR_SPLICE: |
5034 | return LowerVECTOR_SPLICE(Op, DAG); |
5035 | } |
5036 | } |
5037 | |
5038 | bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { |
5039 | return !Subtarget->useSVEForFixedLengthVectors(); |
5040 | } |
5041 | |
5042 | bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( |
5043 | EVT VT, bool OverrideNEON) const { |
5044 | if (!Subtarget->useSVEForFixedLengthVectors()) |
5045 | return false; |
5046 | |
5047 | if (!VT.isFixedLengthVector()) |
5048 | return false; |
5049 | |
5050 | |
5051 | switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { |
5052 | |
5053 | |
5054 | case MVT::i1: |
5055 | default: |
5056 | return false; |
5057 | case MVT::i8: |
5058 | case MVT::i16: |
5059 | case MVT::i32: |
5060 | case MVT::i64: |
5061 | case MVT::f16: |
5062 | case MVT::f32: |
5063 | case MVT::f64: |
5064 | break; |
5065 | } |
5066 | |
5067 | |
5068 | if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector())) |
5069 | return true; |
5070 | |
5071 | |
5072 | if (VT.getFixedSizeInBits() <= 128) |
5073 | return false; |
5074 | |
5075 | |
5076 | if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits()) |
5077 | return false; |
5078 | |
5079 | |
5080 | |
5081 | if (!VT.isPow2VectorType()) |
5082 | return false; |
5083 | |
5084 | return true; |
5085 | } |
5086 | |
5087 | |
5088 | |
5089 | |
5090 | |
5091 | |
5092 | CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
5093 | bool IsVarArg) const { |
5094 | switch (CC) { |
5095 | default: |
5096 | report_fatal_error("Unsupported calling convention."); |
5097 | case CallingConv::WebKit_JS: |
5098 | return CC_AArch64_WebKit_JS; |
5099 | case CallingConv::GHC: |
5100 | return CC_AArch64_GHC; |
5101 | case CallingConv::C: |
5102 | case CallingConv::Fast: |
5103 | case CallingConv::PreserveMost: |
5104 | case CallingConv::CXX_FAST_TLS: |
5105 | case CallingConv::Swift: |
5106 | case CallingConv::SwiftTail: |
5107 | case CallingConv::Tail: |
5108 | if (Subtarget->isTargetWindows() && IsVarArg) |
5109 | return CC_AArch64_Win64_VarArg; |
5110 | if (!Subtarget->isTargetDarwin()) |
5111 | return CC_AArch64_AAPCS; |
5112 | if (!IsVarArg) |
5113 | return CC_AArch64_DarwinPCS; |
5114 | return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg |
5115 | : CC_AArch64_DarwinPCS_VarArg; |
5116 | case CallingConv::Win64: |
5117 | return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; |
5118 | case CallingConv::CFGuard_Check: |
5119 | return CC_AArch64_Win64_CFGuard_Check; |
5120 | case CallingConv::AArch64_VectorCall: |
5121 | case CallingConv::AArch64_SVE_VectorCall: |
5122 | return CC_AArch64_AAPCS; |
5123 | } |
5124 | } |
5125 | |
5126 | CCAssignFn * |
5127 | AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { |
5128 | return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS |
5129 | : RetCC_AArch64_AAPCS; |
5130 | } |
5131 | |
5132 | SDValue AArch64TargetLowering::LowerFormalArguments( |
5133 | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
5134 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
5135 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
5136 | MachineFunction &MF = DAG.getMachineFunction(); |
5137 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5138 | bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); |
5139 | |
5140 | |
5141 | SmallVector<CCValAssign, 16> ArgLocs; |
5142 | DenseMap<unsigned, SDValue> CopiedRegs; |
5143 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
5144 | |
5145 | |
5146 | |
5147 | |
5148 | |
5149 | |
5150 | |
5151 | unsigned NumArgs = Ins.size(); |
5152 | Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); |
5153 | unsigned CurArgIdx = 0; |
5154 | for (unsigned i = 0; i != NumArgs; ++i) { |
5155 | MVT ValVT = Ins[i].VT; |
5156 | if (Ins[i].isOrigArg()) { |
5157 | std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx); |
5158 | CurArgIdx = Ins[i].getOrigArgIndex(); |
5159 | |
5160 | |
5161 | EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(), |
5162 | true); |
5163 | MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; |
5164 | |
5165 | if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) |
5166 | ValVT = MVT::i8; |
5167 | else if (ActualMVT == MVT::i16) |
5168 | ValVT = MVT::i16; |
5169 | } |
5170 | bool UseVarArgCC = false; |
5171 | if (IsWin64) |
5172 | UseVarArgCC = isVarArg; |
5173 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); |
5174 | bool Res = |
5175 | AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); |
5176 | assert(!Res && "Call operand has unhandled type"); |
5177 | (void)Res; |
5178 | } |
5179 | SmallVector<SDValue, 16> ArgValues; |
5180 | unsigned ExtraArgLocs = 0; |
5181 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
5182 | CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; |
5183 | |
5184 | if (Ins[i].Flags.isByVal()) { |
5185 | |
5186 | |
5187 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
5188 | int Size = Ins[i].Flags.getByValSize(); |
5189 | unsigned NumRegs = (Size + 7) / 8; |
5190 | |
5191 | |
5192 | |
5193 | unsigned FrameIdx = |
5194 | MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); |
5195 | SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT); |
5196 | InVals.push_back(FrameIdxN); |
5197 | |
5198 | continue; |
5199 | } |
5200 | |
5201 | if (Ins[i].Flags.isSwiftAsync()) |
5202 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
5203 | |
5204 | SDValue ArgValue; |
5205 | if (VA.isRegLoc()) { |
5206 | |
5207 | EVT RegVT = VA.getLocVT(); |
5208 | const TargetRegisterClass *RC; |
5209 | |
5210 | if (RegVT == MVT::i32) |
5211 | RC = &AArch64::GPR32RegClass; |
5212 | else if (RegVT == MVT::i64) |
5213 | RC = &AArch64::GPR64RegClass; |
5214 | else if (RegVT == MVT::f16 || RegVT == MVT::bf16) |
5215 | RC = &AArch64::FPR16RegClass; |
5216 | else if (RegVT == MVT::f32) |
5217 | RC = &AArch64::FPR32RegClass; |
5218 | else if (RegVT == MVT::f64 || RegVT.is64BitVector()) |
5219 | RC = &AArch64::FPR64RegClass; |
5220 | else if (RegVT == MVT::f128 || RegVT.is128BitVector()) |
5221 | RC = &AArch64::FPR128RegClass; |
5222 | else if (RegVT.isScalableVector() && |
5223 | RegVT.getVectorElementType() == MVT::i1) |
5224 | RC = &AArch64::PPRRegClass; |
5225 | else if (RegVT.isScalableVector()) |
5226 | RC = &AArch64::ZPRRegClass; |
5227 | else |
5228 | llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); |
5229 | |
5230 | |
5231 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); |
5232 | ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); |
5233 | |
5234 | |
5235 | |
5236 | |
5237 | switch (VA.getLocInfo()) { |
5238 | default: |
5239 | llvm_unreachable("Unknown loc info!"); |
5240 | case CCValAssign::Full: |
5241 | break; |
5242 | case CCValAssign::Indirect: |
5243 | assert(VA.getValVT().isScalableVector() && |
5244 | "Only scalable vectors can be passed indirectly"); |
5245 | break; |
5246 | case CCValAssign::BCvt: |
5247 | ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); |
5248 | break; |
5249 | case CCValAssign::AExt: |
5250 | case CCValAssign::SExt: |
5251 | case CCValAssign::ZExt: |
5252 | break; |
5253 | case CCValAssign::AExtUpper: |
5254 | ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, |
5255 | DAG.getConstant(32, DL, RegVT)); |
5256 | ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); |
5257 | break; |
5258 | } |
5259 | } else { |
5260 | assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); |
5261 | unsigned ArgOffset = VA.getLocMemOffset(); |
5262 | unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect |
5263 | ? VA.getLocVT().getSizeInBits() |
5264 | : VA.getValVT().getSizeInBits()) / 8; |
5265 | |
5266 | uint32_t BEAlign = 0; |
5267 | if (!Subtarget->isLittleEndian() && ArgSize < 8 && |
5268 | !Ins[i].Flags.isInConsecutiveRegs()) |
5269 | BEAlign = 8 - ArgSize; |
5270 | |
5271 | int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); |
5272 | |
5273 | |
5274 | SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
5275 | |
5276 | |
5277 | ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; |
5278 | MVT MemVT = VA.getValVT(); |
5279 | |
5280 | switch (VA.getLocInfo()) { |
5281 | default: |
5282 | break; |
5283 | case CCValAssign::Trunc: |
5284 | case CCValAssign::BCvt: |
5285 | MemVT = VA.getLocVT(); |
5286 | break; |
5287 | case CCValAssign::Indirect: |
5288 | assert(VA.getValVT().isScalableVector() && |
5289 | "Only scalable vectors can be passed indirectly"); |
5290 | MemVT = VA.getLocVT(); |
5291 | break; |
5292 | case CCValAssign::SExt: |
5293 | ExtType = ISD::SEXTLOAD; |
5294 | break; |
5295 | case CCValAssign::ZExt: |
5296 | ExtType = ISD::ZEXTLOAD; |
5297 | break; |
5298 | case CCValAssign::AExt: |
5299 | ExtType = ISD::EXTLOAD; |
5300 | break; |
5301 | } |
5302 | |
5303 | ArgValue = |
5304 | DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, |
5305 | MachinePointerInfo::getFixedStack(MF, FI), MemVT); |
5306 | } |
5307 | |
5308 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
5309 | assert(VA.getValVT().isScalableVector() && |
5310 | "Only scalable vectors can be passed indirectly"); |
5311 | |
5312 | uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize(); |
5313 | unsigned NumParts = 1; |
5314 | if (Ins[i].Flags.isInConsecutiveRegs()) { |
5315 | assert(!Ins[i].Flags.isInConsecutiveRegsLast()); |
5316 | while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) |
5317 | ++NumParts; |
5318 | } |
5319 | |
5320 | MVT PartLoad = VA.getValVT(); |
5321 | SDValue Ptr = ArgValue; |
5322 | |
5323 | |
5324 | |
5325 | while (NumParts > 0) { |
5326 | ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo()); |
5327 | InVals.push_back(ArgValue); |
5328 | NumParts--; |
5329 | if (NumParts > 0) { |
5330 | SDValue BytesIncrement = DAG.getVScale( |
5331 | DL, Ptr.getValueType(), |
5332 | APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); |
5333 | SDNodeFlags Flags; |
5334 | Flags.setNoUnsignedWrap(true); |
5335 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
5336 | BytesIncrement, Flags); |
5337 | ExtraArgLocs++; |
5338 | i++; |
5339 | } |
5340 | } |
5341 | } else { |
5342 | if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) |
5343 | ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), |
5344 | ArgValue, DAG.getValueType(MVT::i32)); |
5345 | InVals.push_back(ArgValue); |
5346 | } |
5347 | } |
5348 | assert((ArgLocs.size() + ExtraArgLocs) == Ins.size()); |
5349 | |
5350 | |
5351 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5352 | if (isVarArg) { |
5353 | if (!Subtarget->isTargetDarwin() || IsWin64) { |
5354 | |
5355 | |
5356 | |
5357 | |
5358 | |
5359 | saveVarArgRegisters(CCInfo, DAG, DL, Chain); |
5360 | } |
5361 | |
5362 | |
5363 | unsigned StackOffset = CCInfo.getNextStackOffset(); |
5364 | |
5365 | StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); |
5366 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); |
5367 | |
5368 | if (MFI.hasMustTailInVarArgFunc()) { |
5369 | SmallVector<MVT, 2> RegParmTypes; |
5370 | RegParmTypes.push_back(MVT::i64); |
5371 | RegParmTypes.push_back(MVT::f128); |
5372 | |
5373 | SmallVectorImpl<ForwardedRegister> &Forwards = |
5374 | FuncInfo->getForwardedMustTailRegParms(); |
5375 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, |
5376 | CC_AArch64_AAPCS); |
5377 | |
5378 | |
5379 | if (!CCInfo.isAllocated(AArch64::X8)) { |
5380 | unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass); |
5381 | Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
5382 | } |
5383 | } |
5384 | } |
5385 | |
5386 | |
5387 | |
5388 | |
5389 | if (IsWin64) { |
5390 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
5391 | if (Ins[I].Flags.isInReg()) { |
5392 | assert(!FuncInfo->getSRetReturnReg()); |
5393 | |
5394 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
5395 | Register Reg = |
5396 | MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
5397 | FuncInfo->setSRetReturnReg(Reg); |
5398 | |
5399 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]); |
5400 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); |
5401 | break; |
5402 | } |
5403 | } |
5404 | } |
5405 | |
5406 | unsigned StackArgSize = CCInfo.getNextStackOffset(); |
5407 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; |
5408 | if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { |
5409 | |
5410 | |
5411 | |
5412 | StackArgSize = alignTo(StackArgSize, 16); |
5413 | |
5414 | |
5415 | |
5416 | FuncInfo->setArgumentStackToRestore(StackArgSize); |
5417 | |
5418 | |
5419 | |
5420 | |
5421 | } |
5422 | |
5423 | |
5424 | FuncInfo->setBytesInStackArgArea(StackArgSize); |
5425 | |
5426 | if (Subtarget->hasCustomCallingConv()) |
5427 | Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
5428 | |
5429 | return Chain; |
5430 | } |
5431 | |
5432 | void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, |
5433 | SelectionDAG &DAG, |
5434 | const SDLoc &DL, |
5435 | SDValue &Chain) const { |
5436 | MachineFunction &MF = DAG.getMachineFunction(); |
5437 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5438 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5439 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
5440 | bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); |
5441 | |
5442 | SmallVector<SDValue, 8> MemOps; |
5443 | |
5444 | static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, |
5445 | AArch64::X3, AArch64::X4, AArch64::X5, |
5446 | AArch64::X6, AArch64::X7 }; |
5447 | static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); |
5448 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); |
5449 | |
5450 | unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); |
5451 | int GPRIdx = 0; |
5452 | if (GPRSaveSize != 0) { |
5453 | if (IsWin64) { |
5454 | GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); |
5455 | if (GPRSaveSize & 15) |
5456 | |
5457 | MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); |
5458 | } else |
5459 | GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); |
5460 | |
5461 | SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); |
5462 | |
5463 | for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { |
5464 | unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); |
5465 | SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); |
5466 | SDValue Store = |
5467 | DAG.getStore(Val.getValue(1), DL, Val, FIN, |
5468 | IsWin64 ? MachinePointerInfo::getFixedStack( |
5469 | MF, GPRIdx, (i - FirstVariadicGPR) * 8) |
5470 | : MachinePointerInfo::getStack(MF, i * 8)); |
5471 | MemOps.push_back(Store); |
5472 | FIN = |
5473 | DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); |
5474 | } |
5475 | } |
5476 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
5477 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
5478 | |
5479 | if (Subtarget->hasFPARMv8() && !IsWin64) { |
5480 | static const MCPhysReg FPRArgRegs[] = { |
5481 | AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, |
5482 | AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; |
5483 | static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); |
5484 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); |
5485 | |
5486 | unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); |
5487 | int FPRIdx = 0; |
5488 | if (FPRSaveSize != 0) { |
5489 | FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false); |
5490 | |
5491 | SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT); |
5492 | |
5493 | for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { |
5494 | unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); |
5495 | SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); |
5496 | |
5497 | SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, |
5498 | MachinePointerInfo::getStack(MF, i * 16)); |
5499 | MemOps.push_back(Store); |
5500 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, |
5501 | DAG.getConstant(16, DL, PtrVT)); |
5502 | } |
5503 | } |
5504 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
5505 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
5506 | } |
5507 | |
5508 | if (!MemOps.empty()) { |
5509 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
5510 | } |
5511 | } |
5512 | |
5513 | |
5514 | |
5515 | SDValue AArch64TargetLowering::LowerCallResult( |
5516 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
5517 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
5518 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
5519 | SDValue ThisVal) const { |
5520 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
5521 | |
5522 | SmallVector<CCValAssign, 16> RVLocs; |
5523 | DenseMap<unsigned, SDValue> CopiedRegs; |
5524 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
5525 | *DAG.getContext()); |
5526 | CCInfo.AnalyzeCallResult(Ins, RetCC); |
5527 | |
5528 | |
5529 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
5530 | CCValAssign VA = RVLocs[i]; |
5531 | |
5532 | |
5533 | |
5534 | if (i == 0 && isThisReturn) { |
5535 | assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 && |
5536 | "unexpected return calling convention register assignment"); |
5537 | InVals.push_back(ThisVal); |
5538 | continue; |
5539 | } |
5540 | |
5541 | |
5542 | |
5543 | SDValue Val = CopiedRegs.lookup(VA.getLocReg()); |
5544 | if (!Val) { |
5545 | Val = |
5546 | DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); |
5547 | Chain = Val.getValue(1); |
5548 | InFlag = Val.getValue(2); |
5549 | CopiedRegs[VA.getLocReg()] = Val; |
5550 | } |
5551 | |
5552 | switch (VA.getLocInfo()) { |
5553 | default: |
5554 | llvm_unreachable("Unknown loc info!"); |
5555 | case CCValAssign::Full: |
5556 | break; |
5557 | case CCValAssign::BCvt: |
5558 | Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); |
5559 | break; |
5560 | case CCValAssign::AExtUpper: |
5561 | Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, |
5562 | DAG.getConstant(32, DL, VA.getLocVT())); |
5563 | LLVM_FALLTHROUGH; |
5564 | case CCValAssign::AExt: |
5565 | LLVM_FALLTHROUGH; |
5566 | case CCValAssign::ZExt: |
5567 | Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); |
5568 | break; |
5569 | } |
5570 | |
5571 | InVals.push_back(Val); |
5572 | } |
5573 | |
5574 | return Chain; |
5575 | } |
5576 | |
5577 | |
5578 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
5579 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
5580 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
5581 | } |
5582 | |
5583 | |
5584 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
5585 | switch (CC) { |
5586 | case CallingConv::C: |
5587 | case CallingConv::AArch64_SVE_VectorCall: |
5588 | case CallingConv::PreserveMost: |
5589 | case CallingConv::Swift: |
5590 | case CallingConv::SwiftTail: |
5591 | case CallingConv::Tail: |
5592 | case CallingConv::Fast: |
5593 | return true; |
5594 | default: |
5595 | return false; |
5596 | } |
5597 | } |
5598 | |
5599 | bool AArch64TargetLowering::isEligibleForTailCallOptimization( |
5600 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
5601 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
5602 | const SmallVectorImpl<SDValue> &OutVals, |
5603 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
5604 | if (!mayTailCallThisCC(CalleeCC)) |
5605 | return false; |
5606 | |
5607 | MachineFunction &MF = DAG.getMachineFunction(); |
5608 | const Function &CallerF = MF.getFunction(); |
5609 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
5610 | |
5611 | |
5612 | |
5613 | |
5614 | |
5615 | if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) && |
5616 | AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) |
5617 | CallerCC = CallingConv::AArch64_SVE_VectorCall; |
5618 | |
5619 | bool CCMatch = CallerCC == CalleeCC; |
5620 | |
5621 | |
5622 | |
5623 | |
5624 | if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() && |
5625 | CalleeCC != CallingConv::Win64) |
5626 | return false; |
5627 | |
5628 | |
5629 | |
5630 | |
5631 | for (Function::const_arg_iterator i = CallerF.arg_begin(), |
5632 | e = CallerF.arg_end(); |
5633 | i != e; ++i) { |
5634 | if (i->hasByValAttr()) |
5635 | return false; |
5636 | |
5637 | |
5638 | |
5639 | |
5640 | |
5641 | |
5642 | |
5643 | if (i->hasInRegAttr()) |
5644 | return false; |
5645 | } |
5646 | |
5647 | if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) |
5648 | return CCMatch; |
5649 | |
5650 | |
5651 | |
5652 | |
5653 | |
5654 | |
5655 | |
5656 | |
5657 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
5658 | const GlobalValue *GV = G->getGlobal(); |
5659 | const Triple &TT = getTargetMachine().getTargetTriple(); |
5660 | if (GV->hasExternalWeakLinkage() && |
5661 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) |
5662 | return false; |
5663 | } |
5664 | |
5665 | |
5666 | |
5667 | |
5668 | |
5669 | |
5670 | |
5671 | assert((!isVarArg || CalleeCC == CallingConv::C) && |
5672 | "Unexpected variadic calling convention"); |
5673 | |
5674 | LLVMContext &C = *DAG.getContext(); |
5675 | if (isVarArg && !Outs.empty()) { |
5676 | |
5677 | |
5678 | |
5679 | |
5680 | |
5681 | |
5682 | SmallVector<CCValAssign, 16> ArgLocs; |
5683 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
5684 | |
5685 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); |
5686 | for (const CCValAssign &ArgLoc : ArgLocs) |
5687 | if (!ArgLoc.isRegLoc()) |
5688 | return false; |
5689 | } |
5690 | |
5691 | |
5692 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
5693 | CCAssignFnForCall(CalleeCC, isVarArg), |
5694 | CCAssignFnForCall(CallerCC, isVarArg))) |
5695 | return false; |
5696 | |
5697 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
5698 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
5699 | if (!CCMatch) { |
5700 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
5701 | if (Subtarget->hasCustomCallingConv()) { |
5702 | TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); |
5703 | TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); |
5704 | } |
5705 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
5706 | return false; |
5707 | } |
5708 | |
5709 | |
5710 | if (Outs.empty()) |
5711 | return true; |
5712 | |
5713 | SmallVector<CCValAssign, 16> ArgLocs; |
5714 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
5715 | |
5716 | CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); |
5717 | |
5718 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5719 | |
5720 | |
5721 | |
5722 | |
5723 | |
5724 | if (llvm::any_of(ArgLocs, [](CCValAssign &A) { |
5725 | assert((A.getLocInfo() != CCValAssign::Indirect || |
5726 | A.getValVT().isScalableVector()) && |
5727 | "Expected value to be scalable"); |
5728 | return A.getLocInfo() == CCValAssign::Indirect; |
5729 | })) |
5730 | return false; |
5731 | |
5732 | |
5733 | |
5734 | if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) |
5735 | return false; |
5736 | |
5737 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
5738 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
5739 | return false; |
5740 | |
5741 | return true; |
5742 | } |
5743 | |
5744 | SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, |
5745 | SelectionDAG &DAG, |
5746 | MachineFrameInfo &MFI, |
5747 | int ClobberedFI) const { |
5748 | SmallVector<SDValue, 8> ArgChains; |
5749 | int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); |
5750 | int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; |
5751 | |
5752 | |
5753 | |
5754 | |
5755 | ArgChains.push_back(Chain); |
5756 | |
5757 | |
5758 | for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), |
5759 | UE = DAG.getEntryNode().getNode()->use_end(); |
5760 | U != UE; ++U) |
5761 | if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) |
5762 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) |
5763 | if (FI->getIndex() < 0) { |
5764 | int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); |
5765 | int64_t InLastByte = InFirstByte; |
5766 | InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; |
5767 | |
5768 | if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || |
5769 | (FirstByte <= InFirstByte && InFirstByte <= LastByte)) |
5770 | ArgChains.push_back(SDValue(L, 1)); |
5771 | } |
5772 | |
5773 | |
5774 | return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); |
5775 | } |
5776 | |
5777 | bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, |
5778 | bool TailCallOpt) const { |
5779 | return (CallCC == CallingConv::Fast && TailCallOpt) || |
5780 | CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail; |
5781 | } |
5782 | |
5783 | |
5784 | |
5785 | SDValue |
5786 | AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, |
5787 | SmallVectorImpl<SDValue> &InVals) const { |
5788 | SelectionDAG &DAG = CLI.DAG; |
5789 | SDLoc &DL = CLI.DL; |
5790 | SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; |
5791 | SmallVector<SDValue, 32> &OutVals = CLI.OutVals; |
5792 | SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; |
5793 | SDValue Chain = CLI.Chain; |
5794 | SDValue Callee = CLI.Callee; |
5795 | bool &IsTailCall = CLI.IsTailCall; |
5796 | CallingConv::ID CallConv = CLI.CallConv; |
5797 | bool IsVarArg = CLI.IsVarArg; |
5798 | |
5799 | MachineFunction &MF = DAG.getMachineFunction(); |
5800 | MachineFunction::CallSiteInfo CSInfo; |
5801 | bool IsThisReturn = false; |
5802 | |
5803 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
5804 | bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; |
5805 | bool IsSibCall = false; |
5806 | bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv); |
5807 | |
5808 | |
5809 | |
5810 | if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) { |
5811 | bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ |
5812 | return Out.VT.isScalableVector(); |
5813 | }); |
5814 | bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ |
5815 | return In.VT.isScalableVector(); |
5816 | }); |
5817 | |
5818 | if (CalleeInSVE || CalleeOutSVE) |
5819 | CallConv = CallingConv::AArch64_SVE_VectorCall; |
5820 | } |
5821 | |
5822 | if (IsTailCall) { |
5823 | |
5824 | IsTailCall = isEligibleForTailCallOptimization( |
5825 | Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); |
5826 | |
5827 | |
5828 | |
5829 | if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail && |
5830 | CallConv != CallingConv::SwiftTail) |
5831 | IsSibCall = true; |
5832 | |
5833 | if (IsTailCall) |
5834 | ++NumTailCalls; |
5835 | } |
5836 | |
5837 | if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) |
5838 | report_fatal_error("failed to perform tail call elimination on a call " |
5839 | "site marked musttail"); |
5840 | |
5841 | |
5842 | SmallVector<CCValAssign, 16> ArgLocs; |
5843 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
5844 | |
5845 | if (IsVarArg) { |
5846 | |
5847 | |
5848 | unsigned NumArgs = Outs.size(); |
5849 | |
5850 | for (unsigned i = 0; i != NumArgs; ++i) { |
5851 | MVT ArgVT = Outs[i].VT; |
5852 | if (!Outs[i].IsFixed && ArgVT.isScalableVector()) |
5853 | report_fatal_error("Passing SVE types to variadic functions is " |
5854 | "currently not supported"); |
5855 | |
5856 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
5857 | bool UseVarArgCC = !Outs[i].IsFixed; |
5858 | |
5859 | |
5860 | if (IsCalleeWin64) |
5861 | UseVarArgCC = true; |
5862 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); |
5863 | bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); |
5864 | assert(!Res && "Call operand has unhandled type"); |
5865 | (void)Res; |
5866 | } |
5867 | } else { |
5868 | |
5869 | |
5870 | |
5871 | |
5872 | |
5873 | |
5874 | unsigned NumArgs = Outs.size(); |
5875 | for (unsigned i = 0; i != NumArgs; ++i) { |
5876 | MVT ValVT = Outs[i].VT; |
5877 | |
5878 | EVT ActualVT = getValueType(DAG.getDataLayout(), |
5879 | CLI.getArgs()[Outs[i].OrigArgIndex].Ty, |
5880 | true); |
5881 | MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; |
5882 | ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; |
5883 | |
5884 | if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) |
5885 | ValVT = MVT::i8; |
5886 | else if (ActualMVT == MVT::i16) |
5887 | ValVT = MVT::i16; |
5888 | |
5889 | CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, false); |
5890 | bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); |
5891 | assert(!Res && "Call operand has unhandled type"); |
5892 | (void)Res; |
5893 | } |
5894 | } |
5895 | |
5896 | |
5897 | unsigned NumBytes = CCInfo.getNextStackOffset(); |
5898 | |
5899 | if (IsSibCall) { |
5900 | |
5901 | |
5902 | NumBytes = 0; |
5903 | } |
5904 | |
5905 | |
5906 | |
5907 | |
5908 | |
5909 | |
5910 | int FPDiff = 0; |
5911 | |
5912 | if (IsTailCall && !IsSibCall) { |
5913 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
5914 | |
5915 | |
5916 | |
5917 | NumBytes = alignTo(NumBytes, 16); |
5918 | |
5919 | |
5920 | |
5921 | |
5922 | FPDiff = NumReusableBytes - NumBytes; |
5923 | |
5924 | |
5925 | |
5926 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
5927 | FuncInfo->setTailCallReservedStack(-FPDiff); |
5928 | |
5929 | |
5930 | |
5931 | |
5932 | |
5933 | |
5934 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); |
5935 | } |
5936 | |
5937 | |
5938 | |
5939 | if (!IsSibCall) |
5940 | Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL); |
5941 | |
5942 | SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, |
5943 | getPointerTy(DAG.getDataLayout())); |
5944 | |
5945 | SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; |
5946 | SmallSet<unsigned, 8> RegsUsed; |
5947 | SmallVector<SDValue, 8> MemOpChains; |
5948 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
5949 | |
5950 | if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) { |
5951 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
5952 | for (const auto &F : Forwards) { |
5953 | SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); |
5954 | RegsToPass.emplace_back(F.PReg, Val); |
5955 | } |
5956 | } |
5957 | |
5958 | |
5959 | unsigned ExtraArgLocs = 0; |
5960 | for (unsigned i = 0, e = Outs.size(); i != e; ++i) { |
5961 | CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; |
5962 | SDValue Arg = OutVals[i]; |
5963 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
5964 | |
5965 | |
5966 | switch (VA.getLocInfo()) { |
5967 | default: |
5968 | llvm_unreachable("Unknown loc info!"); |
5969 | case CCValAssign::Full: |
5970 | break; |
5971 | case CCValAssign::SExt: |
5972 | Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); |
5973 | break; |
5974 | case CCValAssign::ZExt: |
5975 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); |
5976 | break; |
5977 | case CCValAssign::AExt: |
5978 | if (Outs[i].ArgVT == MVT::i1) { |
5979 | |
5980 | Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); |
5981 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg); |
5982 | } |
5983 | Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); |
5984 | break; |
5985 | case CCValAssign::AExtUpper: |
5986 | assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); |
5987 | Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); |
5988 | Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, |
5989 | DAG.getConstant(32, DL, VA.getLocVT())); |
5990 | break; |
5991 | case CCValAssign::BCvt: |
5992 | Arg = DAG.getBitcast(VA.getLocVT(), Arg); |
5993 | break; |
5994 | case CCValAssign::Trunc: |
5995 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
5996 | break; |
5997 | case CCValAssign::FPExt: |
5998 | Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); |
5999 | break; |
6000 | case CCValAssign::Indirect: |
6001 | assert(VA.getValVT().isScalableVector() && |
6002 | "Only scalable vectors can be passed indirectly"); |
6003 | |
6004 | uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize(); |
6005 | uint64_t PartSize = StoreSize; |
6006 | unsigned NumParts = 1; |
6007 | if (Outs[i].Flags.isInConsecutiveRegs()) { |
6008 | assert(!Outs[i].Flags.isInConsecutiveRegsLast()); |
6009 | while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) |
6010 | ++NumParts; |
6011 | StoreSize *= NumParts; |
6012 | } |
6013 | |
6014 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6015 | Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext()); |
6016 | Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty); |
6017 | int FI = MFI.CreateStackObject(StoreSize, Alignment, false); |
6018 | MFI.setStackID(FI, TargetStackID::ScalableVector); |
6019 | |
6020 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); |
6021 | SDValue Ptr = DAG.getFrameIndex( |
6022 | FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); |
6023 | SDValue SpillSlot = Ptr; |
6024 | |
6025 | |
6026 | |
6027 | while (NumParts) { |
6028 | Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI); |
6029 | NumParts--; |
6030 | if (NumParts > 0) { |
6031 | SDValue BytesIncrement = DAG.getVScale( |
6032 | DL, Ptr.getValueType(), |
6033 | APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize)); |
6034 | SDNodeFlags Flags; |
6035 | Flags.setNoUnsignedWrap(true); |
6036 | |
6037 | MPI = MachinePointerInfo(MPI.getAddrSpace()); |
6038 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
6039 | BytesIncrement, Flags); |
6040 | ExtraArgLocs++; |
6041 | i++; |
6042 | } |
6043 | } |
6044 | |
6045 | Arg = SpillSlot; |
6046 | break; |
6047 | } |
6048 | |
6049 | if (VA.isRegLoc()) { |
6050 | if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && |
6051 | Outs[0].VT == MVT::i64) { |
6052 | assert(VA.getLocVT() == MVT::i64 && |
6053 | "unexpected calling convention register assignment"); |
6054 | assert(!Ins.empty() && Ins[0].VT == MVT::i64 && |
6055 | "unexpected use of 'returned'"); |
6056 | IsThisReturn = true; |
6057 | } |
6058 | if (RegsUsed.count(VA.getLocReg())) { |
6059 | |
6060 | |
6061 | |
6062 | |
6063 | SDValue &Bits = |
6064 | llvm::find_if(RegsToPass, |
6065 | [=](const std::pair<unsigned, SDValue> &Elt) { |
6066 | return Elt.first == VA.getLocReg(); |
6067 | }) |
6068 | ->second; |
6069 | Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); |
6070 | |
6071 | |
6072 | |
6073 | llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) { |
6074 | return ArgReg.Reg == VA.getLocReg(); |
6075 | }); |
6076 | } else { |
6077 | RegsToPass.emplace_back(VA.getLocReg(), Arg); |
6078 | RegsUsed.insert(VA.getLocReg()); |
6079 | const TargetOptions &Options = DAG.getTarget().Options; |
6080 | if (Options.EmitCallSiteInfo) |
6081 | CSInfo.emplace_back(VA.getLocReg(), i); |
6082 | } |
6083 | } else { |
6084 | assert(VA.isMemLoc()); |
6085 | |
6086 | SDValue DstAddr; |
6087 | MachinePointerInfo DstInfo; |
6088 | |
6089 | |
6090 | |
6091 | uint32_t BEAlign = 0; |
6092 | unsigned OpSize; |
6093 | if (VA.getLocInfo() == CCValAssign::Indirect || |
6094 | VA.getLocInfo() == CCValAssign::Trunc) |
6095 | OpSize = VA.getLocVT().getFixedSizeInBits(); |
6096 | else |
6097 | OpSize = Flags.isByVal() ? Flags.getByValSize() * 8 |
6098 | : VA.getValVT().getSizeInBits(); |
6099 | OpSize = (OpSize + 7) / 8; |
6100 | if (!Subtarget->isLittleEndian() && !Flags.isByVal() && |
6101 | !Flags.isInConsecutiveRegs()) { |
6102 | if (OpSize < 8) |
6103 | BEAlign = 8 - OpSize; |
6104 | } |
6105 | unsigned LocMemOffset = VA.getLocMemOffset(); |
6106 | int32_t Offset = LocMemOffset + BEAlign; |
6107 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); |
6108 | PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); |
6109 | |
6110 | if (IsTailCall) { |
6111 | Offset = Offset + FPDiff; |
6112 | int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
6113 | |
6114 | DstAddr = DAG.getFrameIndex(FI, PtrVT); |
6115 | DstInfo = MachinePointerInfo::getFixedStack(MF, FI); |
6116 | |
6117 | |
6118 | |
6119 | |
6120 | Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); |
6121 | } else { |
6122 | SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); |
6123 | |
6124 | DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); |
6125 | DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset); |
6126 | } |
6127 | |
6128 | if (Outs[i].Flags.isByVal()) { |
6129 | SDValue SizeNode = |
6130 | DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); |
6131 | SDValue Cpy = DAG.getMemcpy( |
6132 | Chain, DL, DstAddr, Arg, SizeNode, |
6133 | Outs[i].Flags.getNonZeroByValAlign(), |
6134 | false, false, |
6135 | false, DstInfo, MachinePointerInfo()); |
6136 | |
6137 | MemOpChains.push_back(Cpy); |
6138 | } else { |
6139 | |
6140 | |
6141 | |
6142 | if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || |
6143 | VA.getValVT() == MVT::i16) |
6144 | Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); |
6145 | |
6146 | SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo); |
6147 | MemOpChains.push_back(Store); |
6148 | } |
6149 | } |
6150 | } |
6151 | |
6152 | if (!MemOpChains.empty()) |
6153 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
6154 | |
6155 | |
6156 | |
6157 | SDValue InFlag; |
6158 | for (auto &RegToPass : RegsToPass) { |
6159 | Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first, |
6160 | RegToPass.second, InFlag); |
6161 | InFlag = Chain.getValue(1); |
6162 | } |
6163 | |
6164 | |
6165 | |
6166 | |
6167 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { |
6168 | auto GV = G->getGlobal(); |
6169 | unsigned OpFlags = |
6170 | Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()); |
6171 | if (OpFlags & AArch64II::MO_GOT) { |
6172 | Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); |
6173 | Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); |
6174 | } else { |
6175 | const GlobalValue *GV = G->getGlobal(); |
6176 | Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); |
6177 | } |
6178 | } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { |
6179 | if (getTargetMachine().getCodeModel() == CodeModel::Large && |
6180 | Subtarget->isTargetMachO()) { |
6181 | const char *Sym = S->getSymbol(); |
6182 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); |
6183 | Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); |
6184 | } else { |
6185 | const char *Sym = S->getSymbol(); |
6186 | Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); |
6187 | } |
6188 | } |
6189 | |
6190 | |
6191 | |
6192 | |
6193 | |
6194 | if (IsTailCall && !IsSibCall) { |
6195 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), |
6196 | DAG.getIntPtrConstant(0, DL, true), InFlag, DL); |
6197 | InFlag = Chain.getValue(1); |
6198 | } |
6199 | |
6200 | std::vector<SDValue> Ops; |
6201 | Ops.push_back(Chain); |
6202 | Ops.push_back(Callee); |
6203 | |
6204 | if (IsTailCall) { |
6205 | |
6206 | |
6207 | |
6208 | Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); |
6209 | } |
6210 | |
6211 | |
6212 | |
6213 | for (auto &RegToPass : RegsToPass) |
6214 | Ops.push_back(DAG.getRegister(RegToPass.first, |
6215 | RegToPass.second.getValueType())); |
6216 | |
6217 | |
6218 | const uint32_t *Mask; |
6219 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
6220 | if (IsThisReturn) { |
6221 | |
6222 | Mask = TRI->getThisReturnPreservedMask(MF, CallConv); |
6223 | if (!Mask) { |
6224 | IsThisReturn = false; |
6225 | Mask = TRI->getCallPreservedMask(MF, CallConv); |
6226 | } |
6227 | } else |
6228 | Mask = TRI->getCallPreservedMask(MF, CallConv); |
6229 | |
6230 | if (Subtarget->hasCustomCallingConv()) |
6231 | TRI->UpdateCustomCallPreservedMask(MF, &Mask); |
6232 | |
6233 | if (TRI->isAnyArgRegReserved(MF)) |
6234 | TRI->emitReservedArgRegCallError(MF); |
6235 | |
6236 | assert(Mask && "Missing call preserved mask for calling convention"); |
6237 | Ops.push_back(DAG.getRegisterMask(Mask)); |
6238 | |
6239 | if (InFlag.getNode()) |
6240 | Ops.push_back(InFlag); |
6241 | |
6242 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
6243 | |
6244 | |
6245 | |
6246 | if (IsTailCall) { |
6247 | MF.getFrameInfo().setHasTailCall(); |
6248 | SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); |
6249 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
6250 | return Ret; |
6251 | } |
6252 | |
6253 | unsigned CallOpc = AArch64ISD::CALL; |
6254 | |
6255 | |
6256 | |
6257 | if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { |
6258 | assert(!IsTailCall && |
6259 | "tail calls cannot be marked with clang.arc.attachedcall"); |
6260 | CallOpc = AArch64ISD::CALL_RVMARKER; |
6261 | } |
6262 | |
6263 | |
6264 | Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); |
6265 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
6266 | InFlag = Chain.getValue(1); |
6267 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
6268 | |
6269 | uint64_t CalleePopBytes = |
6270 | DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; |
6271 | |
6272 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), |
6273 | DAG.getIntPtrConstant(CalleePopBytes, DL, true), |
6274 | InFlag, DL); |
6275 | if (!Ins.empty()) |
6276 | InFlag = Chain.getValue(1); |
6277 | |
6278 | |
6279 | |
6280 | return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, |
6281 | InVals, IsThisReturn, |
6282 | IsThisReturn ? OutVals[0] : SDValue()); |
6283 | } |
6284 | |
6285 | bool AArch64TargetLowering::CanLowerReturn( |
6286 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
6287 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
6288 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
6289 | SmallVector<CCValAssign, 16> RVLocs; |
6290 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
6291 | return CCInfo.CheckReturn(Outs, RetCC); |
6292 | } |
6293 | |
6294 | SDValue |
6295 | AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
6296 | bool isVarArg, |
6297 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
6298 | const SmallVectorImpl<SDValue> &OutVals, |
6299 | const SDLoc &DL, SelectionDAG &DAG) const { |
6300 | auto &MF = DAG.getMachineFunction(); |
6301 | auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
6302 | |
6303 | CCAssignFn *RetCC = CCAssignFnForReturn(CallConv); |
6304 | SmallVector<CCValAssign, 16> RVLocs; |
6305 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
6306 | CCInfo.AnalyzeReturn(Outs, RetCC); |
6307 | |
6308 | |
6309 | SDValue Flag; |
6310 | SmallVector<std::pair<unsigned, SDValue>, 4> RetVals; |
6311 | SmallSet<unsigned, 4> RegsUsed; |
6312 | for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); |
6313 | ++i, ++realRVLocIdx) { |
6314 | CCValAssign &VA = RVLocs[i]; |
6315 | assert(VA.isRegLoc() && "Can only return in registers!"); |
6316 | SDValue Arg = OutVals[realRVLocIdx]; |
6317 | |
6318 | switch (VA.getLocInfo()) { |
6319 | default: |
6320 | llvm_unreachable("Unknown loc info!"); |
6321 | case CCValAssign::Full: |
6322 | if (Outs[i].ArgVT == MVT::i1) { |
6323 | |
6324 | |
6325 | |
6326 | Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); |
6327 | Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); |
6328 | } |
6329 | break; |
6330 | case CCValAssign::BCvt: |
6331 | Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); |
6332 | break; |
6333 | case CCValAssign::AExt: |
6334 | case CCValAssign::ZExt: |
6335 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
6336 | break; |
6337 | case CCValAssign::AExtUpper: |
6338 | assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); |
6339 | Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); |
6340 | Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, |
6341 | DAG.getConstant(32, DL, VA.getLocVT())); |
6342 | break; |
6343 | } |
6344 | |
6345 | if (RegsUsed.count(VA.getLocReg())) { |
6346 | SDValue &Bits = |
6347 | llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) { |
6348 | return Elt.first == VA.getLocReg(); |
6349 | })->second; |
6350 | Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); |
6351 | } else { |
6352 | RetVals.emplace_back(VA.getLocReg(), Arg); |
6353 | RegsUsed.insert(VA.getLocReg()); |
6354 | } |
6355 | } |
6356 | |
6357 | SmallVector<SDValue, 4> RetOps(1, Chain); |
6358 | for (auto &RetVal : RetVals) { |
6359 | Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); |
6360 | Flag = Chain.getValue(1); |
6361 | RetOps.push_back( |
6362 | DAG.getRegister(RetVal.first, RetVal.second.getValueType())); |
6363 | } |
6364 | |
6365 | |
6366 | |
6367 | |
6368 | |
6369 | if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { |
6370 | SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg, |
6371 | getPointerTy(MF.getDataLayout())); |
6372 | |
6373 | unsigned RetValReg = AArch64::X0; |
6374 | Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag); |
6375 | Flag = Chain.getValue(1); |
6376 | |
6377 | RetOps.push_back( |
6378 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
6379 | } |
6380 | |
6381 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
6382 | const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF); |
6383 | if (I) { |
6384 | for (; *I; ++I) { |
6385 | if (AArch64::GPR64RegClass.contains(*I)) |
6386 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
6387 | else if (AArch64::FPR64RegClass.contains(*I)) |
6388 | RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); |
6389 | else |
6390 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
6391 | } |
6392 | } |
6393 | |
6394 | RetOps[0] = Chain; |
6395 | |
6396 | |
6397 | if (Flag.getNode()) |
6398 | RetOps.push_back(Flag); |
6399 | |
6400 | return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps); |
6401 | } |
6402 | |
6403 | |
6404 | |
6405 | |
6406 | |
6407 | SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, |
6408 | SelectionDAG &DAG, |
6409 | unsigned Flag) const { |
6410 | return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, |
6411 | N->getOffset(), Flag); |
6412 | } |
6413 | |
6414 | SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, |
6415 | SelectionDAG &DAG, |
6416 | unsigned Flag) const { |
6417 | return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); |
6418 | } |
6419 | |
6420 | SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, |
6421 | SelectionDAG &DAG, |
6422 | unsigned Flag) const { |
6423 | return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), |
6424 | N->getOffset(), Flag); |
6425 | } |
6426 | |
6427 | SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, |
6428 | SelectionDAG &DAG, |
6429 | unsigned Flag) const { |
6430 | return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); |
6431 | } |
6432 | |
6433 | |
6434 | template <class NodeTy> |
6435 | SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG, |
6436 | unsigned Flags) const { |
6437 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); |
6438 | SDLoc DL(N); |
6439 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6440 | SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags); |
6441 | |
6442 | |
6443 | return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); |
6444 | } |
6445 | |
6446 | |
6447 | template <class NodeTy> |
6448 | SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG, |
6449 | unsigned Flags) const { |
6450 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); |
6451 | SDLoc DL(N); |
6452 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6453 | const unsigned char MO_NC = AArch64II::MO_NC; |
6454 | return DAG.getNode( |
6455 | AArch64ISD::WrapperLarge, DL, Ty, |
6456 | getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags), |
6457 | getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags), |
6458 | getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags), |
6459 | getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags)); |
6460 | } |
6461 | |
6462 | |
6463 | template <class NodeTy> |
6464 | SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, |
6465 | unsigned Flags) const { |
6466 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); |
6467 | SDLoc DL(N); |
6468 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6469 | SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags); |
6470 | SDValue Lo = getTargetNode(N, Ty, DAG, |
6471 | AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags); |
6472 | SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); |
6473 | return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); |
6474 | } |
6475 | |
6476 | |
6477 | template <class NodeTy> |
6478 | SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG, |
6479 | unsigned Flags) const { |
6480 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n"); |
6481 | SDLoc DL(N); |
6482 | EVT Ty = getPointerTy(DAG.getDataLayout()); |
6483 | SDValue Sym = getTargetNode(N, Ty, DAG, Flags); |
6484 | return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym); |
6485 | } |
6486 | |
6487 | SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, |
6488 | SelectionDAG &DAG) const { |
6489 | GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); |
6490 | const GlobalValue *GV = GN->getGlobal(); |
6491 | unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); |
6492 | |
6493 | if (OpFlags != AArch64II::MO_NO_FLAG) |
6494 | assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && |
6495 | "unexpected offset in global node"); |
6496 | |
6497 | |
6498 | |
6499 | if ((OpFlags & AArch64II::MO_GOT) != 0) { |
6500 | return getGOT(GN, DAG, OpFlags); |
6501 | } |
6502 | |
6503 | SDValue Result; |
6504 | if (getTargetMachine().getCodeModel() == CodeModel::Large) { |
6505 | Result = getAddrLarge(GN, DAG, OpFlags); |
6506 | } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { |
6507 | Result = getAddrTiny(GN, DAG, OpFlags); |
6508 | } else { |
6509 | Result = getAddr(GN, DAG, OpFlags); |
6510 | } |
6511 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6512 | SDLoc DL(GN); |
6513 | if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB)) |
6514 | Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, |
6515 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
6516 | return Result; |
6517 | } |
6518 | |
6519 | |
6520 | |
6521 | |
6522 | |
6523 | |
6524 | |
6525 | |
6526 | |
6527 | |
6528 | |
6529 | |
6530 | |
6531 | |
6532 | |
6533 | |
6534 | |
6535 | |
6536 | |
6537 | |
6538 | |
6539 | |
6540 | |
6541 | |
6542 | |
6543 | |
6544 | |
6545 | |
6546 | |
6547 | SDValue |
6548 | AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, |
6549 | SelectionDAG &DAG) const { |
6550 | assert(Subtarget->isTargetDarwin() && |
6551 | "This function expects a Darwin target"); |
6552 | |
6553 | SDLoc DL(Op); |
6554 | MVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6555 | MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout()); |
6556 | const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); |
6557 | |
6558 | SDValue TLVPAddr = |
6559 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); |
6560 | SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr); |
6561 | |
6562 | |
6563 | |
6564 | SDValue Chain = DAG.getEntryNode(); |
6565 | SDValue FuncTLVGet = DAG.getLoad( |
6566 | PtrMemVT, DL, Chain, DescAddr, |
6567 | MachinePointerInfo::getGOT(DAG.getMachineFunction()), |
6568 | Align(PtrMemVT.getSizeInBits() / 8), |
6569 | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); |
6570 | Chain = FuncTLVGet.getValue(1); |
6571 | |
6572 | |
6573 | FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT); |
6574 | |
6575 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
6576 | MFI.setAdjustsStack(true); |
6577 | |
6578 | |
6579 | |
6580 | |
6581 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
6582 | const uint32_t *Mask = TRI->getTLSCallPreservedMask(); |
6583 | if (Subtarget->hasCustomCallingConv()) |
6584 | TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask); |
6585 | |
6586 | |
6587 | |
6588 | |
6589 | Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue()); |
6590 | Chain = |
6591 | DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), |
6592 | Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64), |
6593 | DAG.getRegisterMask(Mask), Chain.getValue(1)); |
6594 | return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1)); |
6595 | } |
6596 | |
6597 | |
6598 | |
6599 | |
6600 | SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV, |
6601 | SDValue ThreadBase, |
6602 | const SDLoc &DL, |
6603 | SelectionDAG &DAG) const { |
6604 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6605 | SDValue TPOff, Addr; |
6606 | |
6607 | switch (DAG.getTarget().Options.TLSSize) { |
6608 | default: |
6609 | llvm_unreachable("Unexpected TLS size"); |
6610 | |
6611 | case 12: { |
6612 | |
6613 | |
6614 | SDValue Var = DAG.getTargetGlobalAddress( |
6615 | GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); |
6616 | return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, |
6617 | Var, |
6618 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6619 | 0); |
6620 | } |
6621 | |
6622 | case 24: { |
6623 | |
6624 | |
6625 | |
6626 | SDValue HiVar = DAG.getTargetGlobalAddress( |
6627 | GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); |
6628 | SDValue LoVar = DAG.getTargetGlobalAddress( |
6629 | GV, DL, PtrVT, 0, |
6630 | AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
6631 | Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase, |
6632 | HiVar, |
6633 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6634 | 0); |
6635 | return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr, |
6636 | LoVar, |
6637 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6638 | 0); |
6639 | } |
6640 | |
6641 | case 32: { |
6642 | |
6643 | |
6644 | |
6645 | |
6646 | SDValue HiVar = DAG.getTargetGlobalAddress( |
6647 | GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); |
6648 | SDValue LoVar = DAG.getTargetGlobalAddress( |
6649 | GV, DL, PtrVT, 0, |
6650 | AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); |
6651 | TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, |
6652 | DAG.getTargetConstant(16, DL, MVT::i32)), |
6653 | 0); |
6654 | TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, |
6655 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6656 | 0); |
6657 | return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); |
6658 | } |
6659 | |
6660 | case 48: { |
6661 | |
6662 | |
6663 | |
6664 | |
6665 | |
6666 | SDValue HiVar = DAG.getTargetGlobalAddress( |
6667 | GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2); |
6668 | SDValue MiVar = DAG.getTargetGlobalAddress( |
6669 | GV, DL, PtrVT, 0, |
6670 | AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC); |
6671 | SDValue LoVar = DAG.getTargetGlobalAddress( |
6672 | GV, DL, PtrVT, 0, |
6673 | AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); |
6674 | TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, |
6675 | DAG.getTargetConstant(32, DL, MVT::i32)), |
6676 | 0); |
6677 | TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar, |
6678 | DAG.getTargetConstant(16, DL, MVT::i32)), |
6679 | 0); |
6680 | TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, |
6681 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6682 | 0); |
6683 | return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); |
6684 | } |
6685 | } |
6686 | } |
6687 | |
6688 | |
6689 | |
6690 | |
6691 | |
6692 | |
6693 | |
6694 | |
6695 | |
6696 | |
6697 | |
6698 | |
6699 | |
6700 | |
6701 | |
6702 | |
6703 | |
6704 | |
6705 | |
6706 | SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, |
6707 | const SDLoc &DL, |
6708 | SelectionDAG &DAG) const { |
6709 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6710 | |
6711 | SDValue Chain = DAG.getEntryNode(); |
6712 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
6713 | |
6714 | Chain = |
6715 | DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr}); |
6716 | SDValue Glue = Chain.getValue(1); |
6717 | |
6718 | return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); |
6719 | } |
6720 | |
6721 | SDValue |
6722 | AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, |
6723 | SelectionDAG &DAG) const { |
6724 | assert(Subtarget->isTargetELF() && "This function expects an ELF target"); |
6725 | |
6726 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
6727 | |
6728 | TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); |
6729 | |
6730 | if (!EnableAArch64ELFLocalDynamicTLSGeneration) { |
6731 | if (Model == TLSModel::LocalDynamic) |
6732 | Model = TLSModel::GeneralDynamic; |
6733 | } |
6734 | |
6735 | if (getTargetMachine().getCodeModel() == CodeModel::Large && |
6736 | Model != TLSModel::LocalExec) |
6737 | report_fatal_error("ELF TLS only supported in small memory model or " |
6738 | "in local exec TLS model"); |
6739 | |
6740 | |
6741 | |
6742 | |
6743 | |
6744 | |
6745 | |
6746 | SDValue TPOff; |
6747 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6748 | SDLoc DL(Op); |
6749 | const GlobalValue *GV = GA->getGlobal(); |
6750 | |
6751 | SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); |
6752 | |
6753 | if (Model == TLSModel::LocalExec) { |
6754 | return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG); |
6755 | } else if (Model == TLSModel::InitialExec) { |
6756 | TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); |
6757 | TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); |
6758 | } else if (Model == TLSModel::LocalDynamic) { |
6759 | |
6760 | |
6761 | |
6762 | |
6763 | |
6764 | |
6765 | AArch64FunctionInfo *MFI = |
6766 | DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); |
6767 | MFI->incNumLocalDynamicTLSAccesses(); |
6768 | |
6769 | |
6770 | |
6771 | |
6772 | SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, |
6773 | AArch64II::MO_TLS); |
6774 | |
6775 | |
6776 | |
6777 | TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); |
6778 | |
6779 | |
6780 | |
6781 | SDValue HiVar = DAG.getTargetGlobalAddress( |
6782 | GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); |
6783 | SDValue LoVar = DAG.getTargetGlobalAddress( |
6784 | GV, DL, MVT::i64, 0, |
6785 | AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
6786 | |
6787 | TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar, |
6788 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6789 | 0); |
6790 | TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar, |
6791 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6792 | 0); |
6793 | } else if (Model == TLSModel::GeneralDynamic) { |
6794 | |
6795 | |
6796 | |
6797 | SDValue SymAddr = |
6798 | DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); |
6799 | |
6800 | |
6801 | TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG); |
6802 | } else |
6803 | llvm_unreachable("Unsupported ELF TLS access model"); |
6804 | |
6805 | return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); |
6806 | } |
6807 | |
6808 | SDValue |
6809 | AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op, |
6810 | SelectionDAG &DAG) const { |
6811 | assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); |
6812 | |
6813 | SDValue Chain = DAG.getEntryNode(); |
6814 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
6815 | SDLoc DL(Op); |
6816 | |
6817 | SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64); |
6818 | |
6819 | |
6820 | |
6821 | SDValue TLSArray = |
6822 | DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL)); |
6823 | TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); |
6824 | Chain = TLSArray.getValue(1); |
6825 | |
6826 | |
6827 | |
6828 | |
6829 | |
6830 | SDValue TLSIndexHi = |
6831 | DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE); |
6832 | SDValue TLSIndexLo = DAG.getTargetExternalSymbol( |
6833 | "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
6834 | SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi); |
6835 | SDValue TLSIndex = |
6836 | DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo); |
6837 | TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo()); |
6838 | Chain = TLSIndex.getValue(1); |
6839 | |
6840 | |
6841 | |
6842 | TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex); |
6843 | SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, |
6844 | DAG.getConstant(3, DL, PtrVT)); |
6845 | SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, |
6846 | DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), |
6847 | MachinePointerInfo()); |
6848 | Chain = TLS.getValue(1); |
6849 | |
6850 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
6851 | const GlobalValue *GV = GA->getGlobal(); |
6852 | SDValue TGAHi = DAG.getTargetGlobalAddress( |
6853 | GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12); |
6854 | SDValue TGALo = DAG.getTargetGlobalAddress( |
6855 | GV, DL, PtrVT, 0, |
6856 | AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
6857 | |
6858 | |
6859 | SDValue Addr = |
6860 | SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi, |
6861 | DAG.getTargetConstant(0, DL, MVT::i32)), |
6862 | 0); |
6863 | Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo); |
6864 | return Addr; |
6865 | } |
6866 | |
6867 | SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, |
6868 | SelectionDAG &DAG) const { |
6869 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
6870 | if (DAG.getTarget().useEmulatedTLS()) |
6871 | return LowerToTLSEmulatedModel(GA, DAG); |
6872 | |
6873 | if (Subtarget->isTargetDarwin()) |
6874 | return LowerDarwinGlobalTLSAddress(Op, DAG); |
6875 | if (Subtarget->isTargetELF()) |
6876 | return LowerELFGlobalTLSAddress(Op, DAG); |
6877 | if (Subtarget->isTargetWindows()) |
6878 | return LowerWindowsGlobalTLSAddress(Op, DAG); |
6879 | |
6880 | llvm_unreachable("Unexpected platform trying to use TLS"); |
6881 | } |
6882 | |
6883 | |
6884 | |
6885 | |
6886 | std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) { |
6887 | if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG) |
6888 | return {Val.getOperand(0), |
6889 | cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() - |
6890 | 1}; |
6891 | |
6892 | if (Val.getOpcode() == ISD::SIGN_EXTEND) |
6893 | return {Val.getOperand(0), |
6894 | Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1}; |
6895 | |
6896 | return {Val, Val.getValueSizeInBits() - 1}; |
6897 | } |
6898 | |
6899 | SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { |
6900 | SDValue Chain = Op.getOperand(0); |
6901 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); |
6902 | SDValue LHS = Op.getOperand(2); |
6903 | SDValue RHS = Op.getOperand(3); |
6904 | SDValue Dest = Op.getOperand(4); |
6905 | SDLoc dl(Op); |
6906 | |
6907 | MachineFunction &MF = DAG.getMachineFunction(); |
6908 | |
6909 | |
6910 | |
6911 | bool ProduceNonFlagSettingCondBr = |
6912 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); |
6913 | |
6914 | |
6915 | |
6916 | |
6917 | if (LHS.getValueType() == MVT::f128) { |
6918 | softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); |
6919 | |
6920 | |
6921 | |
6922 | if (!RHS.getNode()) { |
6923 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); |
6924 | CC = ISD::SETNE; |
6925 | } |
6926 | } |
6927 | |
6928 | |
6929 | |
6930 | if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) && |
6931 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
6932 | |
6933 | if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) |
6934 | return SDValue(); |
6935 | |
6936 | |
6937 | AArch64CC::CondCode OFCC; |
6938 | SDValue Value, Overflow; |
6939 | std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG); |
6940 | |
6941 | if (CC == ISD::SETNE) |
6942 | OFCC = getInvertedCondCode(OFCC); |
6943 | SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); |
6944 | |
6945 | return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
6946 | Overflow); |
6947 | } |
6948 | |
6949 | if (LHS.getValueType().isInteger()) { |
6950 | assert((LHS.getValueType() == RHS.getValueType()) && |
6951 | (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); |
6952 | |
6953 | |
6954 | |
6955 | const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); |
6956 | if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) { |
6957 | if (CC == ISD::SETEQ) { |
6958 | |
6959 | |
6960 | |
6961 | |
6962 | if (LHS.getOpcode() == ISD::AND && |
6963 | isa<ConstantSDNode>(LHS.getOperand(1)) && |
6964 | isPowerOf2_64(LHS.getConstantOperandVal(1))) { |
6965 | SDValue Test = LHS.getOperand(0); |
6966 | uint64_t Mask = LHS.getConstantOperandVal(1); |
6967 | return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, |
6968 | DAG.getConstant(Log2_64(Mask), dl, MVT::i64), |
6969 | Dest); |
6970 | } |
6971 | |
6972 | return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); |
6973 | } else if (CC == ISD::SETNE) { |
6974 | |
6975 | |
6976 | |
6977 | |
6978 | if (LHS.getOpcode() == ISD::AND && |
6979 | isa<ConstantSDNode>(LHS.getOperand(1)) && |
6980 | isPowerOf2_64(LHS.getConstantOperandVal(1))) { |
6981 | SDValue Test = LHS.getOperand(0); |
6982 | uint64_t Mask = LHS.getConstantOperandVal(1); |
6983 | return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, |
6984 | DAG.getConstant(Log2_64(Mask), dl, MVT::i64), |
6985 | Dest); |
6986 | } |
6987 | |
6988 | return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); |
6989 | } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) { |
6990 | |
6991 | |
6992 | |
6993 | uint64_t SignBitPos; |
6994 | std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); |
6995 | return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, |
6996 | DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); |
6997 | } |
6998 | } |
6999 | if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && |
7000 | LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) { |
7001 | |
7002 | |
7003 | |
7004 | uint64_t SignBitPos; |
7005 | std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS); |
7006 | return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, |
7007 | DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); |
7008 | } |
7009 | |
7010 | SDValue CCVal; |
7011 | SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); |
7012 | return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
7013 | Cmp); |
7014 | } |
7015 | |
7016 | assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || |
7017 | LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); |
7018 | |
7019 | |
7020 | |
7021 | SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
7022 | AArch64CC::CondCode CC1, CC2; |
7023 | changeFPCCToAArch64CC(CC, CC1, CC2); |
7024 | SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); |
7025 | SDValue BR1 = |
7026 | DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); |
7027 | if (CC2 != AArch64CC::AL) { |
7028 | SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); |
7029 | return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, |
7030 | Cmp); |
7031 | } |
7032 | |
7033 | return BR1; |
7034 | } |
7035 | |
7036 | SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, |
7037 | SelectionDAG &DAG) const { |
7038 | EVT VT = Op.getValueType(); |
7039 | SDLoc DL(Op); |
7040 | |
7041 | SDValue In1 = Op.getOperand(0); |
7042 | SDValue In2 = Op.getOperand(1); |
7043 | EVT SrcVT = In2.getValueType(); |
7044 | |
7045 | if (VT.isScalableVector()) { |
7046 | if (VT != SrcVT) |
7047 | return SDValue(); |
7048 | |
7049 | |
7050 | |
7051 | |
7052 | |
7053 | |
7054 | |
7055 | |
7056 | EVT IntVT = |
7057 | getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger()); |
7058 | unsigned NumBits = VT.getScalarSizeInBits(); |
7059 | SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT); |
7060 | SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT); |
7061 | SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask, |
7062 | getSVESafeBitCast(IntVT, In2, DAG)); |
7063 | SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask, |
7064 | getSVESafeBitCast(IntVT, In1, DAG)); |
7065 | SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude); |
7066 | return getSVESafeBitCast(VT, IntResult, DAG); |
7067 | } |
7068 | |
7069 | if (SrcVT.bitsLT(VT)) |
7070 | In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); |
7071 | else if (SrcVT.bitsGT(VT)) |
7072 | In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); |
7073 | |
7074 | EVT VecVT; |
7075 | uint64_t EltMask; |
7076 | SDValue VecVal1, VecVal2; |
7077 | |
7078 | auto setVecVal = [&] (int Idx) { |
7079 | if (!VT.isVector()) { |
7080 | VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT, |
7081 | DAG.getUNDEF(VecVT), In1); |
7082 | VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT, |
7083 | DAG.getUNDEF(VecVT), In2); |
7084 | } else { |
7085 | VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); |
7086 | VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); |
7087 | } |
7088 | }; |
7089 | |
7090 | if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { |
7091 | VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32); |
7092 | EltMask = 0x80000000ULL; |
7093 | setVecVal(AArch64::ssub); |
7094 | } else if (VT == MVT::f64 || VT == MVT::v2f64) { |
7095 | VecVT = MVT::v2i64; |
7096 | |
7097 | |
7098 | |
7099 | |
7100 | EltMask = 0; |
7101 | |
7102 | setVecVal(AArch64::dsub); |
7103 | } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) { |
7104 | VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16); |
7105 | EltMask = 0x8000ULL; |
7106 | setVecVal(AArch64::hsub); |
7107 | } else { |
7108 | llvm_unreachable("Invalid type for copysign!"); |
7109 | } |
7110 | |
7111 | SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT); |
7112 | |
7113 | |
7114 | |
7115 | if (VT == MVT::f64 || VT == MVT::v2f64) { |
7116 | BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); |
7117 | BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); |
7118 | BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); |
7119 | } |
7120 | |
7121 | SDValue Sel = |
7122 | DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); |
7123 | |
7124 | if (VT == MVT::f16) |
7125 | return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel); |
7126 | if (VT == MVT::f32) |
7127 | return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); |
7128 | else if (VT == MVT::f64) |
7129 | return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel); |
7130 | else |
7131 | return DAG.getNode(ISD::BITCAST, DL, VT, Sel); |
7132 | } |
7133 | |
7134 | SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { |
7135 | if (DAG.getMachineFunction().getFunction().hasFnAttribute( |
7136 | Attribute::NoImplicitFloat)) |
7137 | return SDValue(); |
7138 | |
7139 | if (!Subtarget->hasNEON()) |
7140 | return SDValue(); |
7141 | |
7142 | |
7143 | |
7144 | |
7145 | |
7146 | |
7147 | |
7148 | |
7149 | |
7150 | SDValue Val = Op.getOperand(0); |
7151 | SDLoc DL(Op); |
7152 | EVT VT = Op.getValueType(); |
7153 | |
7154 | if (VT == MVT::i32 || VT == MVT::i64) { |
7155 | if (VT == MVT::i32) |
7156 | Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); |
7157 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); |
7158 | |
7159 | SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val); |
7160 | SDValue UaddLV = DAG.getNode( |
7161 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, |
7162 | DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); |
7163 | |
7164 | if (VT == MVT::i64) |
7165 | UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); |
7166 | return UaddLV; |
7167 | } else if (VT == MVT::i128) { |
7168 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val); |
7169 | |
7170 | SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val); |
7171 | SDValue UaddLV = DAG.getNode( |
7172 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, |
7173 | DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); |
7174 | |
7175 | return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV); |
7176 | } |
7177 | |
7178 | if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) |
7179 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU); |
7180 | |
7181 | assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || |
7182 | VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && |
7183 | "Unexpected type for custom ctpop lowering"); |
7184 | |
7185 | EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; |
7186 | Val = DAG.getBitcast(VT8Bit, Val); |
7187 | Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val); |
7188 | |
7189 | |
7190 | unsigned EltSize = 8; |
7191 | unsigned NumElts = VT.is64BitVector() ? 8 : 16; |
7192 | while (EltSize != VT.getScalarSizeInBits()) { |
7193 | EltSize *= 2; |
7194 | NumElts /= 2; |
7195 | MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts); |
7196 | Val = DAG.getNode( |
7197 | ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, |
7198 | DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val); |
7199 | } |
7200 | |
7201 | return Val; |
7202 | } |
7203 | |
7204 | SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { |
7205 | EVT VT = Op.getValueType(); |
7206 | assert(VT.isScalableVector() || |
7207 | useSVEForFixedLengthVectorVT(VT, true)); |
7208 | |
7209 | SDLoc DL(Op); |
7210 | SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0)); |
7211 | return DAG.getNode(ISD::CTLZ, DL, VT, RBIT); |
7212 | } |
7213 | |
7214 | SDValue AArch64TargetLowering::LowerMinMax(SDValue Op, |
7215 | SelectionDAG &DAG) const { |
7216 | |
7217 | EVT VT = Op.getValueType(); |
7218 | SDLoc DL(Op); |
7219 | unsigned Opcode = Op.getOpcode(); |
7220 | ISD::CondCode CC; |
7221 | switch (Opcode) { |
7222 | default: |
7223 | llvm_unreachable("Wrong instruction"); |
7224 | case ISD::SMAX: |
7225 | CC = ISD::SETGT; |
7226 | break; |
7227 | case ISD::SMIN: |
7228 | CC = ISD::SETLT; |
7229 | break; |
7230 | case ISD::UMAX: |
7231 | CC = ISD::SETUGT; |
7232 | break; |
7233 | case ISD::UMIN: |
7234 | CC = ISD::SETULT; |
7235 | break; |
7236 | } |
7237 | |
7238 | if (VT.isScalableVector() || |
7239 | useSVEForFixedLengthVectorVT(VT, true)) { |
7240 | switch (Opcode) { |
7241 | default: |
7242 | llvm_unreachable("Wrong instruction"); |
7243 | case ISD::SMAX: |
7244 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED, |
7245 | true); |
7246 | case ISD::SMIN: |
7247 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED, |
7248 | true); |
7249 | case ISD::UMAX: |
7250 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED, |
7251 | true); |
7252 | case ISD::UMIN: |
7253 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED, |
7254 | true); |
7255 | } |
7256 | } |
7257 | |
7258 | SDValue Op0 = Op.getOperand(0); |
7259 | SDValue Op1 = Op.getOperand(1); |
7260 | SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); |
7261 | return DAG.getSelect(DL, VT, Cond, Op0, Op1); |
7262 | } |
7263 | |
7264 | SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op, |
7265 | SelectionDAG &DAG) const { |
7266 | EVT VT = Op.getValueType(); |
7267 | |
7268 | if (VT.isScalableVector() || |
7269 | useSVEForFixedLengthVectorVT(VT, true)) |
7270 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU, |
7271 | true); |
7272 | |
7273 | SDLoc DL(Op); |
7274 | SDValue REVB; |
7275 | MVT VST; |
7276 | |
7277 | switch (VT.getSimpleVT().SimpleTy) { |
7278 | default: |
7279 | llvm_unreachable("Invalid type for bitreverse!"); |
7280 | |
7281 | case MVT::v2i32: { |
7282 | VST = MVT::v8i8; |
7283 | REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0)); |
7284 | |
7285 | break; |
7286 | } |
7287 | |
7288 | case MVT::v4i32: { |
7289 | VST = MVT::v16i8; |
7290 | REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0)); |
7291 | |
7292 | break; |
7293 | } |
7294 | |
7295 | case MVT::v1i64: { |
7296 | VST = MVT::v8i8; |
7297 | REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0)); |
7298 | |
7299 | break; |
7300 | } |
7301 | |
7302 | case MVT::v2i64: { |
7303 | VST = MVT::v16i8; |
7304 | REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0)); |
7305 | |
7306 | break; |
7307 | } |
7308 | } |
7309 | |
7310 | return DAG.getNode(AArch64ISD::NVCAST, DL, VT, |
7311 | DAG.getNode(ISD::BITREVERSE, DL, VST, REVB)); |
7312 | } |
7313 | |
7314 | SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { |
7315 | |
7316 | if (Op.getValueType().isVector()) |
7317 | return LowerVSETCC(Op, DAG); |
7318 | |
7319 | bool IsStrict = Op->isStrictFPOpcode(); |
7320 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
7321 | unsigned OpNo = IsStrict ? 1 : 0; |
7322 | SDValue Chain; |
7323 | if (IsStrict) |
7324 | Chain = Op.getOperand(0); |
7325 | SDValue LHS = Op.getOperand(OpNo + 0); |
7326 | SDValue RHS = Op.getOperand(OpNo + 1); |
7327 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get(); |
7328 | SDLoc dl(Op); |
7329 | |
7330 | |
7331 | EVT VT = Op.getValueType(); |
7332 | SDValue TVal = DAG.getConstant(1, dl, VT); |
7333 | SDValue FVal = DAG.getConstant(0, dl, VT); |
7334 | |
7335 | |
7336 | |
7337 | if (LHS.getValueType() == MVT::f128) { |
7338 | softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain, |
7339 | IsSignaling); |
7340 | |
7341 | |
7342 | if (!RHS.getNode()) { |
7343 | assert(LHS.getValueType() == Op.getValueType() && |
7344 | "Unexpected setcc expansion!"); |
7345 | return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS; |
7346 | } |
7347 | } |
7348 | |
7349 | if (LHS.getValueType().isInteger()) { |
7350 | SDValue CCVal; |
7351 | SDValue Cmp = getAArch64Cmp( |
7352 | LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl); |
7353 | |
7354 | |
7355 | |
7356 | |
7357 | SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); |
7358 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
7359 | } |
7360 | |
7361 | |
7362 | assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || |
7363 | LHS.getValueType() == MVT::f64); |
7364 | |
7365 | |
7366 | |
7367 | SDValue Cmp; |
7368 | if (IsStrict) |
7369 | Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling); |
7370 | else |
7371 | Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
7372 | |
7373 | AArch64CC::CondCode CC1, CC2; |
7374 | changeFPCCToAArch64CC(CC, CC1, CC2); |
7375 | SDValue Res; |
7376 | if (CC2 == AArch64CC::AL) { |
7377 | changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1, |
7378 | CC2); |
7379 | SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); |
7380 | |
7381 | |
7382 | |
7383 | |
7384 | Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); |
7385 | } else { |
7386 | |
7387 | |
7388 | |
7389 | |
7390 | |
7391 | |
7392 | SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); |
7393 | SDValue CS1 = |
7394 | DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); |
7395 | |
7396 | SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); |
7397 | Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); |
7398 | } |
7399 | return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res; |
7400 | } |
7401 | |
7402 | SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, |
7403 | SDValue RHS, SDValue TVal, |
7404 | SDValue FVal, const SDLoc &dl, |
7405 | SelectionDAG &DAG) const { |
7406 | |
7407 | |
7408 | if (LHS.getValueType() == MVT::f128) { |
7409 | softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); |
7410 | |
7411 | |
7412 | |
7413 | if (!RHS.getNode()) { |
7414 | RHS = DAG.getConstant(0, dl, LHS.getValueType()); |
7415 | CC = ISD::SETNE; |
7416 | } |
7417 | } |
7418 | |
7419 | |
7420 | if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { |
7421 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); |
7422 | RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); |
7423 | } |
7424 | |
7425 | |
7426 | if (LHS.getValueType().isInteger()) { |
7427 | assert((LHS.getValueType() == RHS.getValueType()) && |
7428 | (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); |
7429 | |
7430 | ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); |
7431 | ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); |
7432 | ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); |
7433 | |
7434 | |
7435 | |
7436 | if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal && |
7437 | CTVal->isOne() && CFVal->isAllOnesValue() && |
7438 | LHS.getValueType() == TVal.getValueType()) { |
7439 | EVT VT = LHS.getValueType(); |
7440 | SDValue Shift = |
7441 | DAG.getNode(ISD::SRA, dl, VT, LHS, |
7442 | DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); |
7443 | return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT)); |
7444 | } |
7445 | |
7446 | unsigned Opcode = AArch64ISD::CSEL; |
7447 | |
7448 | |
7449 | |
7450 | if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) { |
7451 | std::swap(TVal, FVal); |
7452 | std::swap(CTVal, CFVal); |
7453 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
7454 | } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) { |
7455 | std::swap(TVal, FVal); |
7456 | std::swap(CTVal, CFVal); |
7457 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
7458 | } else if (TVal.getOpcode() == ISD::XOR) { |
7459 | |
7460 | |
7461 | if (isAllOnesConstant(TVal.getOperand(1))) { |
7462 | std::swap(TVal, FVal); |
7463 | std::swap(CTVal, CFVal); |
7464 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
7465 | } |
7466 | } else if (TVal.getOpcode() == ISD::SUB) { |
7467 | |
7468 | |
7469 | if (isNullConstant(TVal.getOperand(0))) { |
7470 | std::swap(TVal, FVal); |
7471 | std::swap(CTVal, CFVal); |
7472 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
7473 | } |
7474 | } else if (CTVal && CFVal) { |
7475 | const int64_t TrueVal = CTVal->getSExtValue(); |
7476 | const int64_t FalseVal = CFVal->getSExtValue(); |
7477 | bool Swap = false; |
7478 | |
7479 | |
7480 | |
7481 | |
7482 | if (TrueVal == ~FalseVal) { |
7483 | Opcode = AArch64ISD::CSINV; |
7484 | } else if (FalseVal > std::numeric_limits<int64_t>::min() && |
7485 | TrueVal == -FalseVal) { |
7486 | Opcode = AArch64ISD::CSNEG; |
7487 | } else if (TVal.getValueType() == MVT::i32) { |
7488 | |
7489 | |
7490 | |
7491 | |
7492 | |
7493 | const uint32_t TrueVal32 = CTVal->getZExtValue(); |
7494 | const uint32_t FalseVal32 = CFVal->getZExtValue(); |
7495 | |
7496 | if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { |
7497 | Opcode = AArch64ISD::CSINC; |
7498 | |
7499 | if (TrueVal32 > FalseVal32) { |
7500 | Swap = true; |
7501 | } |
7502 | } |
7503 | |
7504 | } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { |
7505 | Opcode = AArch64ISD::CSINC; |
7506 | |
7507 | if (TrueVal > FalseVal) { |
7508 | Swap = true; |
7509 | } |
7510 | } |
7511 | |
7512 | |
7513 | if (Swap) { |
7514 | std::swap(TVal, FVal); |
7515 | std::swap(CTVal, CFVal); |
7516 | CC = ISD::getSetCCInverse(CC, LHS.getValueType()); |
7517 | } |
7518 | |
7519 | if (Opcode != AArch64ISD::CSEL) { |
7520 | |
7521 | |
7522 | FVal = TVal; |
7523 | } |
7524 | } |
7525 | |
7526 | |
7527 | |
7528 | |
7529 | |
7530 | |
7531 | ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS); |
7532 | if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() && |
7533 | !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) { |
7534 | AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); |
7535 | |
7536 | |
7537 | if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ) |
7538 | TVal = LHS; |
7539 | else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE) |
7540 | FVal = LHS; |
7541 | } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) { |
7542 | assert (CTVal && CFVal && "Expected constant operands for CSNEG."); |
7543 | |
7544 | |
7545 | AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); |
7546 | if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) { |
7547 | Opcode = AArch64ISD::CSINV; |
7548 | TVal = LHS; |
7549 | FVal = DAG.getConstant(0, dl, FVal.getValueType()); |
7550 | } |
7551 | } |
7552 | |
7553 | SDValue CCVal; |
7554 | SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); |
7555 | EVT VT = TVal.getValueType(); |
7556 | return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); |
7557 | } |
7558 | |
7559 | |
7560 | assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || |
7561 | LHS.getValueType() == MVT::f64); |
7562 | assert(LHS.getValueType() == RHS.getValueType()); |
7563 | EVT VT = TVal.getValueType(); |
7564 | SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); |
7565 | |
7566 | |
7567 | |
7568 | AArch64CC::CondCode CC1, CC2; |
7569 | changeFPCCToAArch64CC(CC, CC1, CC2); |
7570 | |
7571 | if (DAG.getTarget().Options.UnsafeFPMath) { |
7572 | |
7573 | |
7574 | ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS); |
7575 | if (RHSVal && RHSVal->isZero()) { |
7576 | ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal); |
7577 | ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal); |
7578 | |
7579 | if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) && |
7580 | CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType()) |
7581 | TVal = LHS; |
7582 | else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) && |
7583 | CFVal && CFVal->isZero() && |
7584 | FVal.getValueType() == LHS.getValueType()) |
7585 | FVal = LHS; |
7586 | } |
7587 | } |
7588 | |
7589 | |
7590 | SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); |
7591 | SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); |
7592 | |
7593 | |
7594 | |
7595 | if (CC2 != AArch64CC::AL) { |
7596 | SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); |
7597 | return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); |
7598 | } |
7599 | |
7600 | |
7601 | return CS1; |
7602 | } |
7603 | |
7604 | SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op, |
7605 | SelectionDAG &DAG) const { |
7606 | |
7607 | EVT Ty = Op.getValueType(); |
7608 | auto Idx = Op.getConstantOperandAPInt(2); |
7609 | if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements())) |
7610 | return Op; |
7611 | return SDValue(); |
7612 | } |
7613 | |
7614 | SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, |
7615 | SelectionDAG &DAG) const { |
7616 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); |
7617 | SDValue LHS = Op.getOperand(0); |
7618 | SDValue RHS = Op.getOperand(1); |
7619 | SDValue TVal = Op.getOperand(2); |
7620 | SDValue FVal = Op.getOperand(3); |
7621 | SDLoc DL(Op); |
7622 | return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); |
7623 | } |
7624 | |
7625 | SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, |
7626 | SelectionDAG &DAG) const { |
7627 | SDValue CCVal = Op->getOperand(0); |
7628 | SDValue TVal = Op->getOperand(1); |
7629 | SDValue FVal = Op->getOperand(2); |
7630 | SDLoc DL(Op); |
7631 | |
7632 | EVT Ty = Op.getValueType(); |
7633 | if (Ty.isScalableVector()) { |
7634 | SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal); |
7635 | MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount()); |
7636 | SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC); |
7637 | return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal); |
7638 | } |
7639 | |
7640 | if (useSVEForFixedLengthVectorVT(Ty)) { |
7641 | |
7642 | |
7643 | |
7644 | MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits()); |
7645 | MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount()); |
7646 | SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT); |
7647 | SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal); |
7648 | return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal); |
7649 | } |
7650 | |
7651 | |
7652 | |
7653 | if (ISD::isOverflowIntrOpRes(CCVal)) { |
7654 | |
7655 | if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) |
7656 | return SDValue(); |
7657 | |
7658 | AArch64CC::CondCode OFCC; |
7659 | SDValue Value, Overflow; |
7660 | std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); |
7661 | SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); |
7662 | |
7663 | return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, |
7664 | CCVal, Overflow); |
7665 | } |
7666 | |
7667 | |
7668 | ISD::CondCode CC; |
7669 | SDValue LHS, RHS; |
7670 | if (CCVal.getOpcode() == ISD::SETCC) { |
7671 | LHS = CCVal.getOperand(0); |
7672 | RHS = CCVal.getOperand(1); |
7673 | CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get(); |
7674 | } else { |
7675 | LHS = CCVal; |
7676 | RHS = DAG.getConstant(0, DL, CCVal.getValueType()); |
7677 | CC = ISD::SETNE; |
7678 | } |
7679 | return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); |
7680 | } |
7681 | |
7682 | SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, |
7683 | SelectionDAG &DAG) const { |
7684 | |
7685 | |
7686 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
7687 | |
7688 | if (getTargetMachine().getCodeModel() == CodeModel::Large && |
7689 | !Subtarget->isTargetMachO()) { |
7690 | return getAddrLarge(JT, DAG); |
7691 | } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { |
7692 | return getAddrTiny(JT, DAG); |
7693 | } |
7694 | return getAddr(JT, DAG); |
7695 | } |
7696 | |
7697 | SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, |
7698 | SelectionDAG &DAG) const { |
7699 | |
7700 | |
7701 | SDLoc DL(Op); |
7702 | SDValue JT = Op.getOperand(1); |
7703 | SDValue Entry = Op.getOperand(2); |
7704 | int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex(); |
7705 | |
7706 | auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); |
7707 | AFI->setJumpTableEntryInfo(JTI, 4, nullptr); |
7708 | |
7709 | SDNode *Dest = |
7710 | DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT, |
7711 | Entry, DAG.getTargetJumpTable(JTI, MVT::i32)); |
7712 | return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0), |
7713 | SDValue(Dest, 0)); |
7714 | } |
7715 | |
7716 | SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, |
7717 | SelectionDAG &DAG) const { |
7718 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
7719 | |
7720 | if (getTargetMachine().getCodeModel() == CodeModel::Large) { |
7721 | |
7722 | if (Subtarget->isTargetMachO()) { |
7723 | return getGOT(CP, DAG); |
7724 | } |
7725 | return getAddrLarge(CP, DAG); |
7726 | } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { |
7727 | return getAddrTiny(CP, DAG); |
7728 | } else { |
7729 | return getAddr(CP, DAG); |
7730 | } |
7731 | } |
7732 | |
7733 | SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, |
7734 | SelectionDAG &DAG) const { |
7735 | BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op); |
7736 | if (getTargetMachine().getCodeModel() == CodeModel::Large && |
7737 | !Subtarget->isTargetMachO()) { |
7738 | return getAddrLarge(BA, DAG); |
7739 | } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) { |
7740 | return getAddrTiny(BA, DAG); |
7741 | } |
7742 | return getAddr(BA, DAG); |
7743 | } |
7744 | |
7745 | SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, |
7746 | SelectionDAG &DAG) const { |
7747 | AArch64FunctionInfo *FuncInfo = |
7748 | DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); |
7749 | |
7750 | SDLoc DL(Op); |
7751 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), |
7752 | getPointerTy(DAG.getDataLayout())); |
7753 | FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout())); |
7754 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
7755 | return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), |
7756 | MachinePointerInfo(SV)); |
7757 | } |
7758 | |
7759 | SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op, |
7760 | SelectionDAG &DAG) const { |
7761 | AArch64FunctionInfo *FuncInfo = |
7762 | DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); |
7763 | |
7764 | SDLoc DL(Op); |
7765 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0 |
7766 | ? FuncInfo->getVarArgsGPRIndex() |
7767 | : FuncInfo->getVarArgsStackIndex(), |
7768 | getPointerTy(DAG.getDataLayout())); |
7769 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
7770 | return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), |
7771 | MachinePointerInfo(SV)); |
7772 | } |
7773 | |
7774 | SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, |
7775 | SelectionDAG &DAG) const { |
7776 | |
7777 | |
7778 | MachineFunction &MF = DAG.getMachineFunction(); |
7779 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
7780 | unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; |
7781 | auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); |
7782 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
7783 | SDLoc DL(Op); |
7784 | |
7785 | SDValue Chain = Op.getOperand(0); |
7786 | SDValue VAList = Op.getOperand(1); |
7787 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
7788 | SmallVector<SDValue, 4> MemOps; |
7789 | |
7790 | |
7791 | unsigned Offset = 0; |
7792 | SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT); |
7793 | Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT); |
7794 | MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, |
7795 | MachinePointerInfo(SV), Align(PtrSize))); |
7796 | |
7797 | |
7798 | Offset += PtrSize; |
7799 | int GPRSize = FuncInfo->getVarArgsGPRSize(); |
7800 | if (GPRSize > 0) { |
7801 | SDValue GRTop, GRTopAddr; |
7802 | |
7803 | GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7804 | DAG.getConstant(Offset, DL, PtrVT)); |
7805 | |
7806 | GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT); |
7807 | GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop, |
7808 | DAG.getConstant(GPRSize, DL, PtrVT)); |
7809 | GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT); |
7810 | |
7811 | MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, |
7812 | MachinePointerInfo(SV, Offset), |
7813 | Align(PtrSize))); |
7814 | } |
7815 | |
7816 | |
7817 | Offset += PtrSize; |
7818 | int FPRSize = FuncInfo->getVarArgsFPRSize(); |
7819 | if (FPRSize > 0) { |
7820 | SDValue VRTop, VRTopAddr; |
7821 | VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7822 | DAG.getConstant(Offset, DL, PtrVT)); |
7823 | |
7824 | VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT); |
7825 | VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop, |
7826 | DAG.getConstant(FPRSize, DL, PtrVT)); |
7827 | VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT); |
7828 | |
7829 | MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, |
7830 | MachinePointerInfo(SV, Offset), |
7831 | Align(PtrSize))); |
7832 | } |
7833 | |
7834 | |
7835 | Offset += PtrSize; |
7836 | SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7837 | DAG.getConstant(Offset, DL, PtrVT)); |
7838 | MemOps.push_back( |
7839 | DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), |
7840 | GROffsAddr, MachinePointerInfo(SV, Offset), Align(4))); |
7841 | |
7842 | |
7843 | Offset += 4; |
7844 | SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7845 | DAG.getConstant(Offset, DL, PtrVT)); |
7846 | MemOps.push_back( |
7847 | DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), |
7848 | VROffsAddr, MachinePointerInfo(SV, Offset), Align(4))); |
7849 | |
7850 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
7851 | } |
7852 | |
7853 | SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, |
7854 | SelectionDAG &DAG) const { |
7855 | MachineFunction &MF = DAG.getMachineFunction(); |
7856 | |
7857 | if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv())) |
7858 | return LowerWin64_VASTART(Op, DAG); |
7859 | else if (Subtarget->isTargetDarwin()) |
7860 | return LowerDarwin_VASTART(Op, DAG); |
7861 | else |
7862 | return LowerAAPCS_VASTART(Op, DAG); |
7863 | } |
7864 | |
7865 | SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, |
7866 | SelectionDAG &DAG) const { |
7867 | |
7868 | |
7869 | SDLoc DL(Op); |
7870 | unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; |
7871 | unsigned VaListSize = |
7872 | (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) |
7873 | ? PtrSize |
7874 | : Subtarget->isTargetILP32() ? 20 : 32; |
7875 | const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); |
7876 | const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
7877 | |
7878 | return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), |
7879 | DAG.getConstant(VaListSize, DL, MVT::i32), |
7880 | Align(PtrSize), false, false, false, |
7881 | MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); |
7882 | } |
7883 | |
7884 | SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
7885 | assert(Subtarget->isTargetDarwin() && |
7886 | "automatic va_arg instruction only works on Darwin"); |
7887 | |
7888 | const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
7889 | EVT VT = Op.getValueType(); |
7890 | SDLoc DL(Op); |
7891 | SDValue Chain = Op.getOperand(0); |
7892 | SDValue Addr = Op.getOperand(1); |
7893 | MaybeAlign Align(Op.getConstantOperandVal(3)); |
7894 | unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; |
7895 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
7896 | auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); |
7897 | SDValue VAList = |
7898 | DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V)); |
7899 | Chain = VAList.getValue(1); |
7900 | VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); |
7901 | |
7902 | if (VT.isScalableVector()) |
7903 | report_fatal_error("Passing SVE types to variadic functions is " |
7904 | "currently not supported"); |
7905 | |
7906 | if (Align && *Align > MinSlotSize) { |
7907 | VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7908 | DAG.getConstant(Align->value() - 1, DL, PtrVT)); |
7909 | VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, |
7910 | DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT)); |
7911 | } |
7912 | |
7913 | Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); |
7914 | unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); |
7915 | |
7916 | |
7917 | |
7918 | |
7919 | |
7920 | if (VT.isInteger() && !VT.isVector()) |
7921 | ArgSize = std::max(ArgSize, MinSlotSize); |
7922 | bool NeedFPTrunc = false; |
7923 | if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { |
7924 | ArgSize = 8; |
7925 | NeedFPTrunc = true; |
7926 | } |
7927 | |
7928 | |
7929 | SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, |
7930 | DAG.getConstant(ArgSize, DL, PtrVT)); |
7931 | VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT); |
7932 | |
7933 | |
7934 | SDValue APStore = |
7935 | DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); |
7936 | |
7937 | |
7938 | if (NeedFPTrunc) { |
7939 | |
7940 | SDValue WideFP = |
7941 | DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo()); |
7942 | |
7943 | SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), |
7944 | DAG.getIntPtrConstant(1, DL)); |
7945 | SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; |
7946 | |
7947 | return DAG.getMergeValues(Ops, DL); |
7948 | } |
7949 | |
7950 | return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo()); |
7951 | } |
7952 | |
7953 | SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, |
7954 | SelectionDAG &DAG) const { |
7955 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
7956 | MFI.setFrameAddressIsTaken(true); |
7957 | |
7958 | EVT VT = Op.getValueType(); |
7959 | SDLoc DL(Op); |
7960 | unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
7961 | SDValue FrameAddr = |
7962 | DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); |
7963 | while (Depth--) |
7964 | FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, |
7965 | MachinePointerInfo()); |
7966 | |
7967 | if (Subtarget->isTargetILP32()) |
7968 | FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr, |
7969 | DAG.getValueType(VT)); |
7970 | |
7971 | return FrameAddr; |
7972 | } |
7973 | |
7974 | SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op, |
7975 | SelectionDAG &DAG) const { |
7976 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
7977 | |
7978 | EVT VT = getPointerTy(DAG.getDataLayout()); |
7979 | SDLoc DL(Op); |
7980 | int FI = MFI.CreateFixedObject(4, 0, false); |
7981 | return DAG.getFrameIndex(FI, VT); |
7982 | } |
7983 | |
7984 | #define GET_REGISTER_MATCHER |
7985 | #include "AArch64GenAsmMatcher.inc" |
7986 | |
7987 | |
7988 | |
7989 | Register AArch64TargetLowering:: |
7990 | getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { |
7991 | Register Reg = MatchRegisterName(RegName); |
7992 | if (AArch64::X1 <= Reg && Reg <= AArch64::X28) { |
7993 | const MCRegisterInfo *MRI = Subtarget->getRegisterInfo(); |
7994 | unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false); |
7995 | if (!Subtarget->isXRegisterReserved(DwarfRegNum)) |
7996 | Reg = 0; |
7997 | } |
7998 | if (Reg) |
7999 | return Reg; |
8000 | report_fatal_error(Twine("Invalid register name \"" |
8001 | + StringRef(RegName) + "\".")); |
8002 | } |
8003 | |
8004 | SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op, |
8005 | SelectionDAG &DAG) const { |
8006 | DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); |
8007 | |
8008 | EVT VT = Op.getValueType(); |
8009 | SDLoc DL(Op); |
8010 | |
8011 | SDValue FrameAddr = |
8012 | DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); |
8013 | SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout())); |
8014 | |
8015 | return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset); |
8016 | } |
8017 | |
8018 | SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, |
8019 | SelectionDAG &DAG) const { |
8020 | MachineFunction &MF = DAG.getMachineFunction(); |
8021 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
8022 | MFI.setReturnAddressIsTaken(true); |
8023 | |
8024 | EVT VT = Op.getValueType(); |
8025 | SDLoc DL(Op); |
8026 | unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
8027 | SDValue ReturnAddress; |
8028 | if (Depth) { |
8029 | SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); |
8030 | SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout())); |
8031 | ReturnAddress = DAG.getLoad( |
8032 | VT, DL, DAG.getEntryNode(), |
8033 | DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo()); |
8034 | } else { |
8035 | |
8036 | |
8037 | unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass); |
8038 | ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); |
8039 | } |
8040 | |
8041 | |
8042 | |
8043 | |
8044 | |
8045 | SDNode *St; |
8046 | if (Subtarget->hasPAuth()) { |
8047 | St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress); |
8048 | } else { |
8049 | |
8050 | SDValue Chain = |
8051 | DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress); |
8052 | St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain); |
8053 | } |
8054 | return SDValue(St, 0); |
8055 | } |
8056 | |
8057 | |
8058 | |
8059 | SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op, |
8060 | SelectionDAG &DAG) const { |
8061 | SDValue Lo, Hi; |
8062 | expandShiftParts(Op.getNode(), Lo, Hi, DAG); |
8063 | return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); |
8064 | } |
8065 | |
8066 | bool AArch64TargetLowering::isOffsetFoldingLegal( |
8067 | const GlobalAddressSDNode *GA) const { |
8068 | |
8069 | |
8070 | return false; |
8071 | } |
8072 | |
8073 | bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
8074 | bool OptForSize) const { |
8075 | bool IsLegal = false; |
8076 | |
8077 | |
8078 | |
8079 | const APInt ImmInt = Imm.bitcastToAPInt(); |
8080 | if (VT == MVT::f64) |
8081 | IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero(); |
8082 | else if (VT == MVT::f32) |
8083 | IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); |
8084 | else if (VT == MVT::f16 && Subtarget->hasFullFP16()) |
8085 | IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero(); |
8086 | |
8087 | |
8088 | |
8089 | |
8090 | |
8091 | |
8092 | if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) { |
8093 | |
8094 | |
8095 | |
8096 | |
8097 | |
8098 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
8099 | AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), |
8100 | Insn); |
8101 | unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2)); |
8102 | IsLegal = Insn.size() <= Limit; |
8103 | } |
8104 | |
8105 | LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString() |
8106 | << " imm value: "; Imm.dump();); |
8107 | return IsLegal; |
8108 | } |
8109 | |
8110 | |
8111 | |
8112 | |
8113 | |
8114 | static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, |
8115 | SDValue Operand, SelectionDAG &DAG, |
8116 | int &ExtraSteps) { |
8117 | EVT VT = Operand.getValueType(); |
8118 | if (ST->hasNEON() && |
8119 | (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 || |
8120 | VT == MVT::f32 || VT == MVT::v1f32 || |
8121 | VT == MVT::v2f32 || VT == MVT::v4f32)) { |
8122 | if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified) |
8123 | |
8124 | |
8125 | |
8126 | |
8127 | |
8128 | ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2; |
8129 | |
8130 | return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand); |
8131 | } |
8132 | |
8133 | return SDValue(); |
8134 | } |
8135 | |
8136 | SDValue |
8137 | AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, |
8138 | const DenormalMode &Mode) const { |
8139 | SDLoc DL(Op); |
8140 | EVT VT = Op.getValueType(); |
8141 | EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
8142 | SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); |
8143 | return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); |
8144 | } |
8145 | |
8146 | SDValue |
8147 | AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op, |
8148 | SelectionDAG &DAG) const { |
8149 | return Op; |
8150 | } |
8151 | |
8152 | SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, |
8153 | SelectionDAG &DAG, int Enabled, |
8154 | int &ExtraSteps, |
8155 | bool &UseOneConst, |
8156 | bool Reciprocal) const { |
8157 | if (Enabled == ReciprocalEstimate::Enabled || |
8158 | (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt())) |
8159 | if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand, |
8160 | DAG, ExtraSteps)) { |
8161 | SDLoc DL(Operand); |
8162 | EVT VT = Operand.getValueType(); |
8163 | |
8164 | SDNodeFlags Flags; |
8165 | Flags.setAllowReassociation(true); |
8166 | |
8167 | |
8168 | |
8169 | for (int i = ExtraSteps; i > 0; --i) { |
8170 | SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, |
8171 | Flags); |
8172 | Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags); |
8173 | Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); |
8174 | } |
8175 | if (!Reciprocal) |
8176 | Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags); |
8177 | |
8178 | ExtraSteps = 0; |
8179 | return Estimate; |
8180 | } |
8181 | |
8182 | return SDValue(); |
8183 | } |
8184 | |
8185 | SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, |
8186 | SelectionDAG &DAG, int Enabled, |
8187 | int &ExtraSteps) const { |
8188 | if (Enabled == ReciprocalEstimate::Enabled) |
8189 | if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand, |
8190 | DAG, ExtraSteps)) { |
8191 | SDLoc DL(Operand); |
8192 | EVT VT = Operand.getValueType(); |
8193 | |
8194 | SDNodeFlags Flags; |
8195 | Flags.setAllowReassociation(true); |
8196 | |
8197 | |
8198 | |
8199 | for (int i = ExtraSteps; i > 0; --i) { |
8200 | SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, |
8201 | Estimate, Flags); |
8202 | Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); |
8203 | } |
8204 | |
8205 | ExtraSteps = 0; |
8206 | return Estimate; |
8207 | } |
8208 | |
8209 | return SDValue(); |
8210 | } |
8211 | |
8212 | |
8213 | |
8214 | |
8215 | |
8216 | |
8217 | |
8218 | |
8219 | |
8220 | |
8221 | |
8222 | |
8223 | |
8224 | |
8225 | |
8226 | |
8227 | |
8228 | |
8229 | |
8230 | |
8231 | |
8232 | |
8233 | |
8234 | |
8235 | |
8236 | |
8237 | |
8238 | |
8239 | const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const { |
8240 | |
8241 | |
8242 | |
8243 | |
8244 | |
8245 | |
8246 | |
8247 | if (!Subtarget->hasFPARMv8()) |
8248 | return "r"; |
8249 | |
8250 | if (ConstraintVT.isFloatingPoint()) |
8251 | return "w"; |
8252 | |
8253 | if (ConstraintVT.isVector() && |
8254 | (ConstraintVT.getSizeInBits() == 64 || |
8255 | ConstraintVT.getSizeInBits() == 128)) |
8256 | return "w"; |
8257 | |
8258 | return "r"; |
8259 | } |
8260 | |
8261 | enum PredicateConstraint { |
8262 | Upl, |
8263 | Upa, |
8264 | Invalid |
8265 | }; |
8266 | |
8267 | static PredicateConstraint parsePredicateConstraint(StringRef Constraint) { |
8268 | PredicateConstraint P = PredicateConstraint::Invalid; |
8269 | if (Constraint == "Upa") |
8270 | P = PredicateConstraint::Upa; |
8271 | if (Constraint == "Upl") |
8272 | P = PredicateConstraint::Upl; |
8273 | return P; |
8274 | } |
8275 | |
8276 | |
8277 | |
8278 | AArch64TargetLowering::ConstraintType |
8279 | AArch64TargetLowering::getConstraintType(StringRef Constraint) const { |
8280 | if (Constraint.size() == 1) { |
8281 | switch (Constraint[0]) { |
8282 | default: |
8283 | break; |
8284 | case 'x': |
8285 | case 'w': |
8286 | case 'y': |
8287 | return C_RegisterClass; |
8288 | |
8289 | |
8290 | case 'Q': |
8291 | return C_Memory; |
8292 | case 'I': |
8293 | case 'J': |
8294 | case 'K': |
8295 | case 'L': |
8296 | case 'M': |
8297 | case 'N': |
8298 | case 'Y': |
8299 | case 'Z': |
8300 | return C_Immediate; |
8301 | case 'z': |
8302 | case 'S': |
8303 | return C_Other; |
8304 | } |
8305 | } else if (parsePredicateConstraint(Constraint) != |
8306 | PredicateConstraint::Invalid) |
8307 | return C_RegisterClass; |
8308 | return TargetLowering::getConstraintType(Constraint); |
8309 | } |
8310 | |
8311 | |
8312 | |
8313 | |
8314 | TargetLowering::ConstraintWeight |
8315 | AArch64TargetLowering::getSingleConstraintMatchWeight( |
8316 | AsmOperandInfo &info, const char *constraint) const { |
8317 | ConstraintWeight weight = CW_Invalid; |
8318 | Value *CallOperandVal = info.CallOperandVal; |
8319 | |
8320 | |
8321 | if (!CallOperandVal) |
8322 | return CW_Default; |
8323 | Type *type = CallOperandVal->getType(); |
8324 | |
8325 | switch (*constraint) { |
8326 | default: |
8327 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
8328 | break; |
8329 | case 'x': |
8330 | case 'w': |
8331 | case 'y': |
8332 | if (type->isFloatingPointTy() || type->isVectorTy()) |
8333 | weight = CW_Register; |
8334 | break; |
8335 | case 'z': |
8336 | weight = CW_Constant; |
8337 | break; |
8338 | case 'U': |
8339 | if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid) |
8340 | weight = CW_Register; |
8341 | break; |
8342 | } |
8343 | return weight; |
8344 | } |
8345 | |
8346 | std::pair<unsigned, const TargetRegisterClass *> |
8347 | AArch64TargetLowering::getRegForInlineAsmConstraint( |
8348 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
8349 | if (Constraint.size() == 1) { |
8350 | switch (Constraint[0]) { |
8351 | case 'r': |
8352 | if (VT.isScalableVector()) |
8353 | return std::make_pair(0U, nullptr); |
8354 | if (Subtarget->hasLS64() && VT.getSizeInBits() == 512) |
8355 | return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass); |
8356 | if (VT.getFixedSizeInBits() == 64) |
8357 | return std::make_pair(0U, &AArch64::GPR64commonRegClass); |
8358 | return std::make_pair(0U, &AArch64::GPR32commonRegClass); |
8359 | case 'w': { |
8360 | if (!Subtarget->hasFPARMv8()) |
8361 | break; |
8362 | if (VT.isScalableVector()) { |
8363 | if (VT.getVectorElementType() != MVT::i1) |
8364 | return std::make_pair(0U, &AArch64::ZPRRegClass); |
8365 | return std::make_pair(0U, nullptr); |
8366 | } |
8367 | uint64_t VTSize = VT.getFixedSizeInBits(); |
8368 | if (VTSize == 16) |
8369 | return std::make_pair(0U, &AArch64::FPR16RegClass); |
8370 | if (VTSize == 32) |
8371 | return std::make_pair(0U, &AArch64::FPR32RegClass); |
8372 | if (VTSize == 64) |
8373 | return std::make_pair(0U, &AArch64::FPR64RegClass); |
8374 | if (VTSize == 128) |
8375 | return std::make_pair(0U, &AArch64::FPR128RegClass); |
8376 | break; |
8377 | } |
8378 | |
8379 | |
8380 | case 'x': |
8381 | if (!Subtarget->hasFPARMv8()) |
8382 | break; |
8383 | if (VT.isScalableVector()) |
8384 | return std::make_pair(0U, &AArch64::ZPR_4bRegClass); |
8385 | if (VT.getSizeInBits() == 128) |
8386 | return std::make_pair(0U, &AArch64::FPR128_loRegClass); |
8387 | break; |
8388 | case 'y': |
8389 | if (!Subtarget->hasFPARMv8()) |
8390 | break; |
8391 | if (VT.isScalableVector()) |
8392 | return std::make_pair(0U, &AArch64::ZPR_3bRegClass); |
8393 | break; |
8394 | } |
8395 | } else { |
8396 | PredicateConstraint PC = parsePredicateConstraint(Constraint); |
8397 | if (PC != PredicateConstraint::Invalid) { |
8398 | if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1) |
8399 | return std::make_pair(0U, nullptr); |
8400 | bool restricted = (PC == PredicateConstraint::Upl); |
8401 | return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass) |
8402 | : std::make_pair(0U, &AArch64::PPRRegClass); |
8403 | } |
8404 | } |
8405 | if (StringRef("{cc}").equals_insensitive(Constraint)) |
8406 | return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); |
8407 | |
8408 | |
8409 | |
8410 | std::pair<unsigned, const TargetRegisterClass *> Res; |
8411 | Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
8412 | |
8413 | |
8414 | if (!Res.second) { |
8415 | unsigned Size = Constraint.size(); |
8416 | if ((Size == 4 || Size == 5) && Constraint[0] == '{' && |
8417 | tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { |
8418 | int RegNo; |
8419 | bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo); |
8420 | if (!Failed && RegNo >= 0 && RegNo <= 31) { |
8421 | |
8422 | |
8423 | |
8424 | if (VT != MVT::Other && VT.getSizeInBits() == 64) { |
8425 | Res.first = AArch64::FPR64RegClass.getRegister(RegNo); |
8426 | Res.second = &AArch64::FPR64RegClass; |
8427 | } else { |
8428 | Res.first = AArch64::FPR128RegClass.getRegister(RegNo); |
8429 | Res.second = &AArch64::FPR128RegClass; |
8430 | } |
8431 | } |
8432 | } |
8433 | } |
8434 | |
8435 | if (Res.second && !Subtarget->hasFPARMv8() && |
8436 | !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) && |
8437 | !AArch64::GPR64allRegClass.hasSubClassEq(Res.second)) |
8438 | return std::make_pair(0U, nullptr); |
8439 | |
8440 | return Res; |
8441 | } |
8442 | |
8443 | EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL, |
8444 | llvm::Type *Ty, |
8445 | bool AllowUnknown) const { |
8446 | if (Subtarget->hasLS64() && Ty->isIntegerTy(512)) |
8447 | return EVT(MVT::i64x8); |
8448 | |
8449 | return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown); |
8450 | } |
8451 | |
8452 | |
8453 | |
8454 | void AArch64TargetLowering::LowerAsmOperandForConstraint( |
8455 | SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, |
8456 | SelectionDAG &DAG) const { |
8457 | SDValue Result; |
8458 | |
8459 | |
8460 | if (Constraint.length() != 1) |
8461 | return; |
8462 | |
8463 | char ConstraintLetter = Constraint[0]; |
8464 | switch (ConstraintLetter) { |
8465 | default: |
8466 | break; |
8467 | |
8468 | |
8469 | |
8470 | case 'z': { |
8471 | |
8472 | if (!isNullConstant(Op)) |
8473 | return; |
8474 | |
8475 | if (Op.getValueType() == MVT::i64) |
8476 | Result = DAG.getRegister(AArch64::XZR, MVT::i64); |
8477 | else |
8478 | Result = DAG.getRegister(AArch64::WZR, MVT::i32); |
8479 | break; |
8480 | } |
8481 | case 'S': { |
8482 | |
8483 | if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { |
8484 | Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), |
8485 | GA->getValueType(0)); |
8486 | } else if (const BlockAddressSDNode *BA = |
8487 | dyn_cast<BlockAddressSDNode>(Op)) { |
8488 | Result = |
8489 | DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0)); |
8490 | } else |
8491 | return; |
8492 | break; |
8493 | } |
8494 | |
8495 | case 'I': |
8496 | case 'J': |
8497 | case 'K': |
8498 | case 'L': |
8499 | case 'M': |
8500 | case 'N': |
8501 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); |
8502 | if (!C) |
8503 | return; |
8504 | |
8505 | |
8506 | uint64_t CVal = C->getZExtValue(); |
8507 | switch (ConstraintLetter) { |
8508 | |
8509 | |
8510 | |
8511 | |
8512 | |
8513 | |
8514 | case 'I': |
8515 | if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal)) |
8516 | break; |
8517 | return; |
8518 | case 'J': { |
8519 | uint64_t NVal = -C->getSExtValue(); |
8520 | if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) { |
8521 | CVal = C->getSExtValue(); |
8522 | break; |
8523 | } |
8524 | return; |
8525 | } |
8526 | |
8527 | |
8528 | |
8529 | |
8530 | |
8531 | |
8532 | |
8533 | case 'K': |
8534 | if (AArch64_AM::isLogicalImmediate(CVal, 32)) |
8535 | break; |
8536 | return; |
8537 | case 'L': |
8538 | if (AArch64_AM::isLogicalImmediate(CVal, 64)) |
8539 | break; |
8540 | return; |
8541 | |
8542 | |
8543 | |
8544 | |
8545 | |
8546 | |
8547 | case 'M': { |
8548 | if (!isUInt<32>(CVal)) |
8549 | return; |
8550 | if (AArch64_AM::isLogicalImmediate(CVal, 32)) |
8551 | break; |
8552 | if ((CVal & 0xFFFF) == CVal) |
8553 | break; |
8554 | if ((CVal & 0xFFFF0000ULL) == CVal) |
8555 | break; |
8556 | uint64_t NCVal = ~(uint32_t)CVal; |
8557 | if ((NCVal & 0xFFFFULL) == NCVal) |
8558 | break; |
8559 | if ((NCVal & 0xFFFF0000ULL) == NCVal) |
8560 | break; |
8561 | return; |
8562 | } |
8563 | case 'N': { |
8564 | if (AArch64_AM::isLogicalImmediate(CVal, 64)) |
8565 | break; |
8566 | if ((CVal & 0xFFFFULL) == CVal) |
8567 | break; |
8568 | if ((CVal & 0xFFFF0000ULL) == CVal) |
8569 | break; |
8570 | if ((CVal & 0xFFFF00000000ULL) == CVal) |
8571 | break; |
8572 | if ((CVal & 0xFFFF000000000000ULL) == CVal) |
8573 | break; |
8574 | uint64_t NCVal = ~CVal; |
8575 | if ((NCVal & 0xFFFFULL) == NCVal) |
8576 | break; |
8577 | if ((NCVal & 0xFFFF0000ULL) == NCVal) |
8578 | break; |
8579 | if ((NCVal & 0xFFFF00000000ULL) == NCVal) |
8580 | break; |
8581 | if ((NCVal & 0xFFFF000000000000ULL) == NCVal) |
8582 | break; |
8583 | return; |
8584 | } |
8585 | default: |
8586 | return; |
8587 | } |
8588 | |
8589 | |
8590 | Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64); |
8591 | break; |
8592 | } |
8593 | |
8594 | if (Result.getNode()) { |
8595 | Ops.push_back(Result); |
8596 | return; |
8597 | } |
8598 | |
8599 | return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
8600 | } |
8601 | |
8602 | |
8603 | |
8604 | |
8605 | |
8606 | |
8607 | |
8608 | static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { |
8609 | EVT VT = V64Reg.getValueType(); |
8610 | unsigned NarrowSize = VT.getVectorNumElements(); |
8611 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
8612 | MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); |
8613 | SDLoc DL(V64Reg); |
8614 | |
8615 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), |
8616 | V64Reg, DAG.getConstant(0, DL, MVT::i64)); |
8617 | } |
8618 | |
8619 | |
8620 | |
8621 | static unsigned getExtFactor(SDValue &V) { |
8622 | EVT EltType = V.getValueType().getVectorElementType(); |
8623 | return EltType.getSizeInBits() / 8; |
8624 | } |
8625 | |
8626 | |
8627 | |
8628 | static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { |
8629 | EVT VT = V128Reg.getValueType(); |
8630 | unsigned WideSize = VT.getVectorNumElements(); |
8631 | MVT EltTy = VT.getVectorElementType().getSimpleVT(); |
8632 | MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); |
8633 | SDLoc DL(V128Reg); |
8634 | |
8635 | return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg); |
8636 | } |
8637 | |
8638 | |
8639 | |
8640 | SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, |
8641 | SelectionDAG &DAG) const { |
8642 | assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); |
8643 | LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"); |
8644 | SDLoc dl(Op); |
8645 | EVT VT = Op.getValueType(); |
8646 | assert(!VT.isScalableVector() && |
8647 | "Scalable vectors cannot be used with ISD::BUILD_VECTOR"); |
8648 | unsigned NumElts = VT.getVectorNumElements(); |
8649 | |
8650 | struct ShuffleSourceInfo { |
8651 | SDValue Vec; |
8652 | unsigned MinElt; |
8653 | unsigned MaxElt; |
8654 | |
8655 | |
8656 | |
8657 | |
8658 | SDValue ShuffleVec; |
8659 | |
8660 | |
8661 | |
8662 | int WindowBase; |
8663 | int WindowScale; |
8664 | |
8665 | ShuffleSourceInfo(SDValue Vec) |
8666 | : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0), |
8667 | ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} |
8668 | |
8669 | bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } |
8670 | }; |
8671 | |
8672 | |
8673 | |
8674 | SmallVector<ShuffleSourceInfo, 2> Sources; |
8675 | for (unsigned i = 0; i < NumElts; ++i) { |
8676 | SDValue V = Op.getOperand(i); |
8677 | if (V.isUndef()) |
8678 | continue; |
8679 | else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
8680 | !isa<ConstantSDNode>(V.getOperand(1))) { |
8681 | LLVM_DEBUG( |
8682 | dbgs() << "Reshuffle failed: " |
8683 | "a shuffle can only come from building a vector from " |
8684 | "various elements of other vectors, provided their " |
8685 | "indices are constant\n"); |
8686 | return SDValue(); |
8687 | } |
8688 | |
8689 | |
8690 | SDValue SourceVec = V.getOperand(0); |
8691 | auto Source = find(Sources, SourceVec); |
8692 | if (Source == Sources.end()) |
8693 | Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); |
8694 | |
8695 | |
8696 | unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); |
8697 | Source->MinElt = std::min(Source->MinElt, EltNo); |
8698 | Source->MaxElt = std::max(Source->MaxElt, EltNo); |
8699 | } |
8700 | |
8701 | if (Sources.size() > 2) { |
8702 | LLVM_DEBUG( |
8703 | dbgs() << "Reshuffle failed: currently only do something sane when at " |
8704 | "most two source vectors are involved\n"); |
8705 | return SDValue(); |
8706 | } |
8707 | |
8708 | |
8709 | |
8710 | EVT SmallestEltTy = VT.getVectorElementType(); |
8711 | for (auto &Source : Sources) { |
8712 | EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); |
8713 | if (SrcEltTy.bitsLT(SmallestEltTy)) { |
8714 | SmallestEltTy = SrcEltTy; |
8715 | } |
8716 | } |
8717 | unsigned ResMultiplier = |
8718 | VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits(); |
8719 | uint64_t VTSize = VT.getFixedSizeInBits(); |
8720 | NumElts = VTSize / SmallestEltTy.getFixedSizeInBits(); |
8721 | EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); |
8722 | |
8723 | |
8724 | |
8725 | |
8726 | for (auto &Src : Sources) { |
8727 | EVT SrcVT = Src.ShuffleVec.getValueType(); |
8728 | |
8729 | uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); |
8730 | if (SrcVTSize == VTSize) |
8731 | continue; |
8732 | |
8733 | |
8734 | |
8735 | EVT EltVT = SrcVT.getVectorElementType(); |
8736 | unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); |
8737 | EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); |
8738 | |
8739 | if (SrcVTSize < VTSize) { |
8740 | assert(2 * SrcVTSize == VTSize); |
8741 | |
8742 | |
8743 | Src.ShuffleVec = |
8744 | DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, |
8745 | DAG.getUNDEF(Src.ShuffleVec.getValueType())); |
8746 | continue; |
8747 | } |
8748 | |
8749 | if (SrcVTSize != 2 * VTSize) { |
8750 | LLVM_DEBUG( |
8751 | dbgs() << "Reshuffle failed: result vector too small to extract\n"); |
8752 | return SDValue(); |
8753 | } |
8754 | |
8755 | if (Src.MaxElt - Src.MinElt >= NumSrcElts) { |
8756 | LLVM_DEBUG( |
8757 | dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"); |
8758 | return SDValue(); |
8759 | } |
8760 | |
8761 | if (Src.MinElt >= NumSrcElts) { |
8762 | |
8763 | Src.ShuffleVec = |
8764 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, |
8765 | DAG.getConstant(NumSrcElts, dl, MVT::i64)); |
8766 | Src.WindowBase = -NumSrcElts; |
8767 | } else if (Src.MaxElt < NumSrcElts) { |
8768 | |
8769 | Src.ShuffleVec = |
8770 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, |
8771 | DAG.getConstant(0, dl, MVT::i64)); |
8772 | } else { |
8773 | |
8774 | SDValue VEXTSrc1 = |
8775 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, |
8776 | DAG.getConstant(0, dl, MVT::i64)); |
8777 | SDValue VEXTSrc2 = |
8778 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, |
8779 | DAG.getConstant(NumSrcElts, dl, MVT::i64)); |
8780 | unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1); |
8781 | |
8782 | if (!SrcVT.is64BitVector()) { |
8783 | LLVM_DEBUG( |
8784 | dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT " |
8785 | "for SVE vectors."); |
8786 | return SDValue(); |
8787 | } |
8788 | |
8789 | Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, |
8790 | VEXTSrc2, |
8791 | DAG.getConstant(Imm, dl, MVT::i32)); |
8792 | Src.WindowBase = -Src.MinElt; |
8793 | } |
8794 | } |
8795 | |
8796 | |
8797 | |
8798 | |
8799 | for (auto &Src : Sources) { |
8800 | EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); |
8801 | if (SrcEltTy == SmallestEltTy) |
8802 | continue; |
8803 | assert(ShuffleVT.getVectorElementType() == SmallestEltTy); |
8804 | Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); |
8805 | Src.WindowScale = |
8806 | SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits(); |
8807 | Src.WindowBase *= Src.WindowScale; |
8808 | } |
8809 | |
8810 | |
8811 | LLVM_DEBUG(for (auto Src |
8812 | : Sources) |
8813 | assert(Src.ShuffleVec.getValueType() == ShuffleVT);); |
8814 | |
8815 | |
8816 | SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1); |
8817 | int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); |
8818 | for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { |
8819 | SDValue Entry = Op.getOperand(i); |
8820 | if (Entry.isUndef()) |
8821 | continue; |
8822 | |
8823 | auto Src = find(Sources, Entry.getOperand(0)); |
8824 | int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue(); |
8825 | |
8826 | |
8827 | |
8828 | |
8829 | EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); |
8830 | int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(), |
8831 | VT.getScalarSizeInBits()); |
8832 | int LanesDefined = BitsDefined / BitsPerShuffleLane; |
8833 | |
8834 | |
8835 | |
8836 | int *LaneMask = &Mask[i * ResMultiplier]; |
8837 | |
8838 | int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; |
8839 | ExtractBase += NumElts * (Src - Sources.begin()); |
8840 | for (int j = 0; j < LanesDefined; ++j) |
8841 | LaneMask[j] = ExtractBase + j; |
8842 | } |
8843 | |
8844 | |
8845 | if (!isShuffleMaskLegal(Mask, ShuffleVT)) { |
8846 | LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n"); |
8847 | return SDValue(); |
8848 | } |
8849 | |
8850 | SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; |
8851 | for (unsigned i = 0; i < Sources.size(); ++i) |
8852 | ShuffleOps[i] = Sources[i].ShuffleVec; |
8853 | |
8854 | SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], |
8855 | ShuffleOps[1], Mask); |
8856 | SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); |
8857 | |
8858 | LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump(); |
8859 | dbgs() << "Reshuffle, creating node: "; V.dump();); |
8860 | |
8861 | return V; |
8862 | } |
8863 | |
8864 | |
8865 | |
8866 | static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { |
8867 | unsigned NumElts = VT.getVectorNumElements(); |
8868 | |
8869 | |
8870 | if (M[0] < 0) |
8871 | return false; |
8872 | |
8873 | Imm = M[0]; |
8874 | |
8875 | |
8876 | |
8877 | |
8878 | unsigned ExpectedElt = Imm; |
8879 | for (unsigned i = 1; i < NumElts; ++i) { |
8880 | |
8881 | |
8882 | ++ExpectedElt; |
8883 | if (ExpectedElt == NumElts) |
8884 | ExpectedElt = 0; |
8885 | |
8886 | if (M[i] < 0) |
8887 | continue; |
8888 | if (ExpectedElt != static_cast<unsigned>(M[i])) |
8889 | return false; |
8890 | } |
8891 | |
8892 | return true; |
8893 | } |
8894 | |
8895 | |
8896 | |
8897 | |
8898 | |
8899 | static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize, |
8900 | unsigned &DupLaneOp) { |
8901 | assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && |
8902 | "Only possible block sizes for wide DUP are: 16, 32, 64"); |
8903 | |
8904 | if (BlockSize <= VT.getScalarSizeInBits()) |
8905 | return false; |
8906 | if (BlockSize % VT.getScalarSizeInBits() != 0) |
8907 | return false; |
8908 | if (VT.getSizeInBits() % BlockSize != 0) |
8909 | return false; |
8910 | |
8911 | size_t SingleVecNumElements = VT.getVectorNumElements(); |
8912 | size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits(); |
8913 | size_t NumBlocks = VT.getSizeInBits() / BlockSize; |
8914 | |
8915 | |
8916 | |
8917 | |
8918 | |
8919 | |
8920 | SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1); |
8921 | for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++) |
8922 | for (size_t I = 0; I < NumEltsPerBlock; I++) { |
8923 | int Elt = M[BlockIndex * NumEltsPerBlock + I]; |
8924 | if (Elt < 0) |
8925 | continue; |
8926 | |
8927 | if ((unsigned)Elt >= SingleVecNumElements) |
8928 | return false; |
8929 | if (BlockElts[I] < 0) |
8930 | BlockElts[I] = Elt; |
8931 | else if (BlockElts[I] != Elt) |
8932 | return false; |
8933 | } |
8934 | |
8935 | |
8936 | |
8937 | |
8938 | |
8939 | |
8940 | auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; }); |
8941 | assert(FirstRealEltIter != BlockElts.end() && |
8942 | "Shuffle with all-undefs must have been caught by previous cases, " |
8943 | "e.g. isSplat()"); |
8944 | if (FirstRealEltIter == BlockElts.end()) { |
8945 | DupLaneOp = 0; |
8946 | return true; |
8947 | } |
8948 | |
8949 | |
8950 | size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin(); |
8951 | |
8952 | if ((unsigned)*FirstRealEltIter < FirstRealIndex) |
8953 | return false; |
8954 | |
8955 | size_t Elt0 = *FirstRealEltIter - FirstRealIndex; |
8956 | |
8957 | |
8958 | if (Elt0 % NumEltsPerBlock != 0) |
8959 | return false; |
8960 | |
8961 | |
8962 | for (size_t I = 0; I < NumEltsPerBlock; I++) |
8963 | if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I) |
8964 | return false; |
8965 | |
8966 | DupLaneOp = Elt0 / NumEltsPerBlock; |
8967 | return true; |
8968 | } |
8969 | |
8970 | |
8971 | |
8972 | static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT, |
8973 | unsigned &Imm) { |
8974 | |
8975 | const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); |
8976 | |
8977 | |
8978 | unsigned NumElts = VT.getVectorNumElements(); |
8979 | unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); |
8980 | APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); |
8981 | |
8982 | |
8983 | const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(), |
8984 | [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;}); |
8985 | if (FirstWrongElt != M.end()) |
8986 | return false; |
8987 | |
8988 | |
8989 | |
8990 | |
8991 | |
8992 | |
8993 | |
8994 | Imm = ExpectedElt.getZExtValue(); |
8995 | |
8996 | |
8997 | |
8998 | |
8999 | |
9000 | |
9001 | |
9002 | if (Imm < NumElts) |
9003 | ReverseEXT = true; |
9004 | else |
9005 | Imm -= NumElts; |
9006 | |
9007 | return true; |
9008 | } |
9009 | |
9010 | |
9011 | |
9012 | |
9013 | static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { |
9014 | assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && |
9015 | "Only possible block sizes for REV are: 16, 32, 64"); |
9016 | |
9017 | unsigned EltSz = VT.getScalarSizeInBits(); |
9018 | if (EltSz == 64) |
9019 | return false; |
9020 | |
9021 | unsigned NumElts = VT.getVectorNumElements(); |
9022 | unsigned BlockElts = M[0] + 1; |
9023 | |
9024 | if (M[0] < 0) |
9025 | BlockElts = BlockSize / EltSz; |
9026 | |
9027 | if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) |
9028 | return false; |
9029 | |
9030 | for (unsigned i = 0; i < NumElts; ++i) { |
9031 | if (M[i] < 0) |
9032 | continue; |
9033 | if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) |
9034 | return false; |
9035 | } |
9036 | |
9037 | return true; |
9038 | } |
9039 | |
9040 | static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9041 | unsigned NumElts = VT.getVectorNumElements(); |
9042 | if (NumElts % 2 != 0) |
9043 | return false; |
9044 | WhichResult = (M[0] == 0 ? 0 : 1); |
9045 | unsigned Idx = WhichResult * NumElts / 2; |
9046 | for (unsigned i = 0; i != NumElts; i += 2) { |
9047 | if ((M[i] >= 0 && (unsigned)M[i] != Idx) || |
9048 | (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) |
9049 | return false; |
9050 | Idx += 1; |
9051 | } |
9052 | |
9053 | return true; |
9054 | } |
9055 | |
9056 | static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9057 | unsigned NumElts = VT.getVectorNumElements(); |
9058 | WhichResult = (M[0] == 0 ? 0 : 1); |
9059 | for (unsigned i = 0; i != NumElts; ++i) { |
9060 | if (M[i] < 0) |
9061 | continue; |
9062 | if ((unsigned)M[i] != 2 * i + WhichResult) |
9063 | return false; |
9064 | } |
9065 | |
9066 | return true; |
9067 | } |
9068 | |
9069 | static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9070 | unsigned NumElts = VT.getVectorNumElements(); |
9071 | if (NumElts % 2 != 0) |
9072 | return false; |
9073 | WhichResult = (M[0] == 0 ? 0 : 1); |
9074 | for (unsigned i = 0; i < NumElts; i += 2) { |
9075 | if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || |
9076 | (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) |
9077 | return false; |
9078 | } |
9079 | return true; |
9080 | } |
9081 | |
9082 | |
9083 | |
9084 | |
9085 | static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9086 | unsigned NumElts = VT.getVectorNumElements(); |
9087 | if (NumElts % 2 != 0) |
9088 | return false; |
9089 | WhichResult = (M[0] == 0 ? 0 : 1); |
9090 | unsigned Idx = WhichResult * NumElts / 2; |
9091 | for (unsigned i = 0; i != NumElts; i += 2) { |
9092 | if ((M[i] >= 0 && (unsigned)M[i] != Idx) || |
9093 | (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx)) |
9094 | return false; |
9095 | Idx += 1; |
9096 | } |
9097 | |
9098 | return true; |
9099 | } |
9100 | |
9101 | |
9102 | |
9103 | |
9104 | static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9105 | unsigned Half = VT.getVectorNumElements() / 2; |
9106 | WhichResult = (M[0] == 0 ? 0 : 1); |
9107 | for (unsigned j = 0; j != 2; ++j) { |
9108 | unsigned Idx = WhichResult; |
9109 | for (unsigned i = 0; i != Half; ++i) { |
9110 | int MIdx = M[i + j * Half]; |
9111 | if (MIdx >= 0 && (unsigned)MIdx != Idx) |
9112 | return false; |
9113 | Idx += 2; |
9114 | } |
9115 | } |
9116 | |
9117 | return true; |
9118 | } |
9119 | |
9120 | |
9121 | |
9122 | |
9123 | static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { |
9124 | unsigned NumElts = VT.getVectorNumElements(); |
9125 | if (NumElts % 2 != 0) |
9126 | return false; |
9127 | WhichResult = (M[0] == 0 ? 0 : 1); |
9128 | for (unsigned i = 0; i < NumElts; i += 2) { |
9129 | if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || |
9130 | (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult)) |
9131 | return false; |
9132 | } |
9133 | return true; |
9134 | } |
9135 | |
9136 | static bool isINSMask(ArrayRef<int> M, int NumInputElements, |
9137 | bool &DstIsLeft, int &Anomaly) { |
9138 | if (M.size() != static_cast<size_t>(NumInputElements)) |
9139 | return false; |
9140 | |
9141 | int NumLHSMatch = 0, NumRHSMatch = 0; |
9142 | int LastLHSMismatch = -1, LastRHSMismatch = -1; |
9143 | |
9144 | for (int i = 0; i < NumInputElements; ++i) { |
9145 | if (M[i] == -1) { |
9146 | ++NumLHSMatch; |
9147 | ++NumRHSMatch; |
9148 | continue; |
9149 | } |
9150 | |
9151 | if (M[i] == i) |
9152 | ++NumLHSMatch; |
9153 | else |
9154 | LastLHSMismatch = i; |
9155 | |
9156 | if (M[i] == i + NumInputElements) |
9157 | ++NumRHSMatch; |
9158 | else |
9159 | LastRHSMismatch = i; |
9160 | } |
9161 | |
9162 | if (NumLHSMatch == NumInputElements - 1) { |
9163 | DstIsLeft = true; |
9164 | Anomaly = LastLHSMismatch; |
9165 | return true; |
9166 | } else if (NumRHSMatch == NumInputElements - 1) { |
9167 | DstIsLeft = false; |
9168 | Anomaly = LastRHSMismatch; |
9169 | return true; |
9170 | } |
9171 | |
9172 | return false; |
9173 | } |
9174 | |
9175 | static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) { |
9176 | if (VT.getSizeInBits() != 128) |
9177 | return false; |
9178 | |
9179 | unsigned NumElts = VT.getVectorNumElements(); |
9180 | |
9181 | for (int I = 0, E = NumElts / 2; I != E; I++) { |
9182 | if (Mask[I] != I) |
9183 | return false; |
9184 | } |
9185 | |
9186 | int Offset = NumElts / 2; |
9187 | for (int I = NumElts / 2, E = NumElts; I != E; I++) { |
9188 | if (Mask[I] != I + SplitLHS * Offset) |
9189 | return false; |
9190 | } |
9191 | |
9192 | return true; |
9193 | } |
9194 | |
9195 | static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { |
9196 | SDLoc DL(Op); |
9197 | EVT VT = Op.getValueType(); |
9198 | SDValue V0 = Op.getOperand(0); |
9199 | SDValue V1 = Op.getOperand(1); |
9200 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask(); |
9201 | |
9202 | if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || |
9203 | VT.getVectorElementType() != V1.getValueType().getVectorElementType()) |
9204 | return SDValue(); |
9205 | |
9206 | bool SplitV0 = V0.getValueSizeInBits() == 128; |
9207 | |
9208 | if (!isConcatMask(Mask, VT, SplitV0)) |
9209 | return SDValue(); |
9210 | |
9211 | EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); |
9212 | if (SplitV0) { |
9213 | V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, |
9214 | DAG.getConstant(0, DL, MVT::i64)); |
9215 | } |
9216 | if (V1.getValueSizeInBits() == 128) { |
9217 | V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, |
9218 | DAG.getConstant(0, DL, MVT::i64)); |
9219 | } |
9220 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); |
9221 | } |
9222 | |
9223 | |
9224 | |
9225 | static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, |
9226 | SDValue RHS, SelectionDAG &DAG, |
9227 | const SDLoc &dl) { |
9228 | unsigned OpNum = (PFEntry >> 26) & 0x0F; |
9229 | unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); |
9230 | unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); |
9231 | |
9232 | enum { |
9233 | OP_COPY = 0, |
9234 | OP_VREV, |
9235 | OP_VDUP0, |
9236 | OP_VDUP1, |
9237 | OP_VDUP2, |
9238 | OP_VDUP3, |
9239 | OP_VEXT1, |
9240 | OP_VEXT2, |
9241 | OP_VEXT3, |
9242 | OP_VUZPL, |
9243 | OP_VUZPR, |
9244 | OP_VZIPL, |
9245 | OP_VZIPR, |
9246 | OP_VTRNL, |
9247 | OP_VTRNR |
9248 | }; |
9249 | |
9250 | if (OpNum == OP_COPY) { |
9251 | if (LHSID == (1 * 9 + 2) * 9 + 3) |
9252 | return LHS; |
9253 | assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"); |
9254 | return RHS; |
9255 | } |
9256 | |
9257 | SDValue OpLHS, OpRHS; |
9258 | OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); |
9259 | OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); |
9260 | EVT VT = OpLHS.getValueType(); |
9261 | |
9262 | switch (OpNum) { |
9263 | default: |
9264 | llvm_unreachable("Unknown shuffle opcode!"); |
9265 | case OP_VREV: |
9266 | |
9267 | if (VT.getVectorElementType() == MVT::i32 || |
9268 | VT.getVectorElementType() == MVT::f32) |
9269 | return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS); |
9270 | |
9271 | if (VT.getVectorElementType() == MVT::i16 || |
9272 | VT.getVectorElementType() == MVT::f16 || |
9273 | VT.getVectorElementType() == MVT::bf16) |
9274 | return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS); |
9275 | |
9276 | assert(VT.getVectorElementType() == MVT::i8); |
9277 | return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS); |
9278 | case OP_VDUP0: |
9279 | case OP_VDUP1: |
9280 | case OP_VDUP2: |
9281 | case OP_VDUP3: { |
9282 | EVT EltTy = VT.getVectorElementType(); |
9283 | unsigned Opcode; |
9284 | if (EltTy == MVT::i8) |
9285 | Opcode = AArch64ISD::DUPLANE8; |
9286 | else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16) |
9287 | Opcode = AArch64ISD::DUPLANE16; |
9288 | else if (EltTy == MVT::i32 || EltTy == MVT::f32) |
9289 | Opcode = AArch64ISD::DUPLANE32; |
9290 | else if (EltTy == MVT::i64 || EltTy == MVT::f64) |
9291 | Opcode = AArch64ISD::DUPLANE64; |
9292 | else |
9293 | llvm_unreachable("Invalid vector element type?"); |
9294 | |
9295 | if (VT.getSizeInBits() == 64) |
9296 | OpLHS = WidenVector(OpLHS, DAG); |
9297 | SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64); |
9298 | return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); |
9299 | } |
9300 | case OP_VEXT1: |
9301 | case OP_VEXT2: |
9302 | case OP_VEXT3: { |
9303 | unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); |
9304 | return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS, |
9305 | DAG.getConstant(Imm, dl, MVT::i32)); |
9306 | } |
9307 | case OP_VUZPL: |
9308 | return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, |
9309 | OpRHS); |
9310 | case OP_VUZPR: |
9311 | return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, |
9312 | OpRHS); |
9313 | case OP_VZIPL: |
9314 | return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, |
9315 | OpRHS); |
9316 | case OP_VZIPR: |
9317 | return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, |
9318 | OpRHS); |
9319 | case OP_VTRNL: |
9320 | return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, |
9321 | OpRHS); |
9322 | case OP_VTRNR: |
9323 | return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, |
9324 | OpRHS); |
9325 | } |
9326 | } |
9327 | |
9328 | static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask, |
9329 | SelectionDAG &DAG) { |
9330 | |
9331 | SDValue V1 = Op.getOperand(0); |
9332 | SDValue V2 = Op.getOperand(1); |
9333 | SDLoc DL(Op); |
9334 | |
9335 | EVT EltVT = Op.getValueType().getVectorElementType(); |
9336 | unsigned BytesPerElt = EltVT.getSizeInBits() / 8; |
9337 | |
9338 | SmallVector<SDValue, 8> TBLMask; |
9339 | for (int Val : ShuffleMask) { |
9340 | for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { |
9341 | unsigned Offset = Byte + Val * BytesPerElt; |
9342 | TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32)); |
9343 | } |
9344 | } |
9345 | |
9346 | MVT IndexVT = MVT::v8i8; |
9347 | unsigned IndexLen = 8; |
9348 | if (Op.getValueSizeInBits() == 128) { |
9349 | IndexVT = MVT::v16i8; |
9350 | IndexLen = 16; |
9351 | } |
9352 | |
9353 | SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); |
9354 | SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); |
9355 | |
9356 | SDValue Shuffle; |
9357 | if (V2.getNode()->isUndef()) { |
9358 | if (IndexLen == 8) |
9359 | V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); |
9360 | Shuffle = DAG.getNode( |
9361 | ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, |
9362 | DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, |
9363 | DAG.getBuildVector(IndexVT, DL, |
9364 | makeArrayRef(TBLMask.data(), IndexLen))); |
9365 | } else { |
9366 | if (IndexLen == 8) { |
9367 | V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); |
9368 | Shuffle = DAG.getNode( |
9369 | ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, |
9370 | DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst, |
9371 | DAG.getBuildVector(IndexVT, DL, |
9372 | makeArrayRef(TBLMask.data(), IndexLen))); |
9373 | } else { |
9374 | |
9375 | |
9376 | |
9377 | |
9378 | |
9379 | |
9380 | Shuffle = DAG.getNode( |
9381 | ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, |
9382 | DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst, |
9383 | V2Cst, DAG.getBuildVector(IndexVT, DL, |
9384 | makeArrayRef(TBLMask.data(), IndexLen))); |
9385 | } |
9386 | } |
9387 | return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); |
9388 | } |
9389 | |
9390 | static unsigned getDUPLANEOp(EVT EltType) { |
9391 | if (EltType == MVT::i8) |
9392 | return AArch64ISD::DUPLANE8; |
9393 | if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16) |
9394 | return AArch64ISD::DUPLANE16; |
9395 | if (EltType == MVT::i32 || EltType == MVT::f32) |
9396 | return AArch64ISD::DUPLANE32; |
9397 | if (EltType == MVT::i64 || EltType == MVT::f64) |
9398 | return AArch64ISD::DUPLANE64; |
9399 | |
9400 | llvm_unreachable("Invalid vector element type?"); |
9401 | } |
9402 | |
9403 | static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, |
9404 | unsigned Opcode, SelectionDAG &DAG) { |
9405 | |
9406 | auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) { |
9407 | |
9408 | if (BitCast.getOpcode() != ISD::BITCAST || |
9409 | BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR) |
9410 | return false; |
9411 | |
9412 | |
9413 | |
9414 | SDValue Extract = BitCast.getOperand(0); |
9415 | unsigned ExtIdx = Extract.getConstantOperandVal(1); |
9416 | unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits(); |
9417 | unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth; |
9418 | unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits(); |
9419 | if (ExtIdxInBits % CastedEltBitWidth != 0) |
9420 | return false; |
9421 | |
9422 | |
9423 | LaneC += ExtIdxInBits / CastedEltBitWidth; |
9424 | |
9425 | |
9426 | |
9427 | |
9428 | |
9429 | |
9430 | unsigned SrcVecNumElts = |
9431 | Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth; |
9432 | CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(), |
9433 | SrcVecNumElts); |
9434 | return true; |
9435 | }; |
9436 | MVT CastVT; |
9437 | if (getScaledOffsetDup(V, Lane, CastVT)) { |
9438 | V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0)); |
9439 | } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
9440 | |
9441 | |
9442 | Lane += V.getConstantOperandVal(1); |
9443 | V = V.getOperand(0); |
9444 | } else if (V.getOpcode() == ISD::CONCAT_VECTORS) { |
9445 | |
9446 | |
9447 | unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; |
9448 | Lane -= Idx * VT.getVectorNumElements() / 2; |
9449 | V = WidenVector(V.getOperand(Idx), DAG); |
9450 | } else if (VT.getSizeInBits() == 64) { |
9451 | |
9452 | V = WidenVector(V, DAG); |
9453 | } |
9454 | return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64)); |
9455 | } |
9456 | |
9457 | SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, |
9458 | SelectionDAG &DAG) const { |
9459 | SDLoc dl(Op); |
9460 | EVT VT = Op.getValueType(); |
9461 | |
9462 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); |
9463 | |
9464 | if (useSVEForFixedLengthVectorVT(VT)) |
9465 | return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG); |
9466 | |
9467 | |
9468 | |
9469 | |
9470 | |
9471 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
9472 | |
9473 | SDValue V1 = Op.getOperand(0); |
9474 | SDValue V2 = Op.getOperand(1); |
9475 | |
9476 | assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!"); |
9477 | assert(ShuffleMask.size() == VT.getVectorNumElements() && |
9478 | "Unexpected VECTOR_SHUFFLE mask size!"); |
9479 | |
9480 | if (SVN->isSplat()) { |
9481 | int Lane = SVN->getSplatIndex(); |
9482 | |
9483 | if (Lane == -1) |
9484 | Lane = 0; |
9485 | |
9486 | if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) |
9487 | return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(), |
9488 | V1.getOperand(0)); |
9489 | |
9490 | |
9491 | if (V1.getOpcode() == ISD::BUILD_VECTOR && |
9492 | !isa<ConstantSDNode>(V1.getOperand(Lane))) |
9493 | return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane)); |
9494 | |
9495 | |
9496 | unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); |
9497 | return constructDup(V1, Lane, dl, VT, Opcode, DAG); |
9498 | } |
9499 | |
9500 | |
9501 | for (unsigned LaneSize : {64U, 32U, 16U}) { |
9502 | unsigned Lane = 0; |
9503 | if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) { |
9504 | unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64 |
9505 | : LaneSize == 32 ? AArch64ISD::DUPLANE32 |
9506 | : AArch64ISD::DUPLANE16; |
9507 | |
9508 | MVT NewEltTy = MVT::getIntegerVT(LaneSize); |
9509 | unsigned NewEltCount = VT.getSizeInBits() / LaneSize; |
9510 | MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount); |
9511 | V1 = DAG.getBitcast(NewVecTy, V1); |
9512 | |
9513 | V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG); |
9514 | |
9515 | return DAG.getBitcast(VT, V1); |
9516 | } |
9517 | } |
9518 | |
9519 | if (isREVMask(ShuffleMask, VT, 64)) |
9520 | return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2); |
9521 | if (isREVMask(ShuffleMask, VT, 32)) |
9522 | return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2); |
9523 | if (isREVMask(ShuffleMask, VT, 16)) |
9524 | return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2); |
9525 | |
9526 | if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) || |
9527 | (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) && |
9528 | ShuffleVectorInst::isReverseMask(ShuffleMask)) { |
9529 | SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1); |
9530 | return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev, |
9531 | DAG.getConstant(8, dl, MVT::i32)); |
9532 | } |
9533 | |
9534 | bool ReverseEXT = false; |
9535 | unsigned Imm; |
9536 | if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { |
9537 | if (ReverseEXT) |
9538 | std::swap(V1, V2); |
9539 | Imm *= getExtFactor(V1); |
9540 | return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2, |
9541 | DAG.getConstant(Imm, dl, MVT::i32)); |
9542 | } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) { |
9543 | Imm *= getExtFactor(V1); |
9544 | return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1, |
9545 | DAG.getConstant(Imm, dl, MVT::i32)); |
9546 | } |
9547 | |
9548 | unsigned WhichResult; |
9549 | if (isZIPMask(ShuffleMask, VT, WhichResult)) { |
9550 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; |
9551 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); |
9552 | } |
9553 | if (isUZPMask(ShuffleMask, VT, WhichResult)) { |
9554 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; |
9555 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); |
9556 | } |
9557 | if (isTRNMask(ShuffleMask, VT, WhichResult)) { |
9558 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; |
9559 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); |
9560 | } |
9561 | |
9562 | if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { |
9563 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; |
9564 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); |
9565 | } |
9566 | if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { |
9567 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; |
9568 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); |
9569 | } |
9570 | if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { |
9571 | unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; |
9572 | return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); |
9573 | } |
9574 | |
9575 | if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG)) |
9576 | return Concat; |
9577 | |
9578 | bool DstIsLeft; |
9579 | int Anomaly; |
9580 | int NumInputElements = V1.getValueType().getVectorNumElements(); |
9581 | if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) { |
9582 | SDValue DstVec = DstIsLeft ? V1 : V2; |
9583 | SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64); |
9584 | |
9585 | SDValue SrcVec = V1; |
9586 | int SrcLane = ShuffleMask[Anomaly]; |
9587 | if (SrcLane >= NumInputElements) { |
9588 | SrcVec = V2; |
9589 | SrcLane -= VT.getVectorNumElements(); |
9590 | } |
9591 | SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64); |
9592 | |
9593 | EVT ScalarVT = VT.getVectorElementType(); |
9594 | |
9595 | if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger()) |
9596 | ScalarVT = MVT::i32; |
9597 | |
9598 | return DAG.getNode( |
9599 | ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, |
9600 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV), |
9601 | DstLaneV); |
9602 | } |
9603 | |
9604 | |
9605 | |
9606 | unsigned NumElts = VT.getVectorNumElements(); |
9607 | if (NumElts == 4) { |
9608 | unsigned PFIndexes[4]; |
9609 | for (unsigned i = 0; i != 4; ++i) { |
9610 | if (ShuffleMask[i] < 0) |
9611 | PFIndexes[i] = 8; |
9612 | else |
9613 | PFIndexes[i] = ShuffleMask[i]; |
9614 | } |
9615 | |
9616 | |
9617 | unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + |
9618 | PFIndexes[2] * 9 + PFIndexes[3]; |
9619 | unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; |
9620 | unsigned Cost = (PFEntry >> 30); |
9621 | |
9622 | if (Cost <= 4) |
9623 | return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); |
9624 | } |
9625 | |
9626 | return GenerateTBL(Op, ShuffleMask, DAG); |
9627 | } |
9628 | |
9629 | SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, |
9630 | SelectionDAG &DAG) const { |
9631 | SDLoc dl(Op); |
9632 | EVT VT = Op.getValueType(); |
9633 | EVT ElemVT = VT.getScalarType(); |
9634 | SDValue SplatVal = Op.getOperand(0); |
9635 | |
9636 | if (useSVEForFixedLengthVectorVT(VT)) |
9637 | return LowerToScalableOp(Op, DAG); |
9638 | |
9639 | |
9640 | |
9641 | switch (ElemVT.getSimpleVT().SimpleTy) { |
9642 | case MVT::i1: { |
9643 | |
9644 | |
9645 | if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) { |
9646 | if (ConstVal->isOne()) |
9647 | return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all); |
9648 | |
9649 | } |
9650 | |
9651 | |
9652 | SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); |
9653 | SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal, |
9654 | DAG.getValueType(MVT::i1)); |
9655 | SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, |
9656 | MVT::i64); |
9657 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, |
9658 | DAG.getConstant(0, dl, MVT::i64), SplatVal); |
9659 | } |
9660 | case MVT::i8: |
9661 | case MVT::i16: |
9662 | case MVT::i32: |
9663 | SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); |
9664 | break; |
9665 | case MVT::i64: |
9666 | SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); |
9667 | break; |
9668 | case MVT::f16: |
9669 | case MVT::bf16: |
9670 | case MVT::f32: |
9671 | case MVT::f64: |
9672 | |
9673 | break; |
9674 | default: |
9675 | report_fatal_error("Unsupported SPLAT_VECTOR input operand type"); |
9676 | } |
9677 | |
9678 | return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); |
9679 | } |
9680 | |
9681 | SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op, |
9682 | SelectionDAG &DAG) const { |
9683 | SDLoc DL(Op); |
9684 | |
9685 | EVT VT = Op.getValueType(); |
9686 | if (!isTypeLegal(VT) || !VT.isScalableVector()) |
9687 | return SDValue(); |
9688 | |
9689 | |
9690 | if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock) |
9691 | return SDValue(); |
9692 | |
9693 | |
9694 | SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); |
9695 | SDValue Idx128 = Op.getOperand(2); |
9696 | |
9697 | |
9698 | auto *CIdx = dyn_cast<ConstantSDNode>(Idx128); |
9699 | if (CIdx && (CIdx->getZExtValue() <= 3)) { |
9700 | SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); |
9701 | SDNode *DUPQ = |
9702 | DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI); |
9703 | return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0)); |
9704 | } |
9705 | |
9706 | |
9707 | |
9708 | |
9709 | |
9710 | SDValue One = DAG.getConstant(1, DL, MVT::i64); |
9711 | SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One); |
9712 | |
9713 | |
9714 | SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64); |
9715 | SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne); |
9716 | |
9717 | |
9718 | SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128); |
9719 | SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64); |
9720 | SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64); |
9721 | |
9722 | |
9723 | SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask); |
9724 | return DAG.getNode(ISD::BITCAST, DL, VT, TBL); |
9725 | } |
9726 | |
9727 | |
9728 | static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, |
9729 | APInt &UndefBits) { |
9730 | EVT VT = BVN->getValueType(0); |
9731 | APInt SplatBits, SplatUndef; |
9732 | unsigned SplatBitSize; |
9733 | bool HasAnyUndefs; |
9734 | if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { |
9735 | unsigned NumSplats = VT.getSizeInBits() / SplatBitSize; |
9736 | |
9737 | for (unsigned i = 0; i < NumSplats; ++i) { |
9738 | CnstBits <<= SplatBitSize; |
9739 | UndefBits <<= SplatBitSize; |
9740 | CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits()); |
9741 | UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits()); |
9742 | } |
9743 | |
9744 | return true; |
9745 | } |
9746 | |
9747 | return false; |
9748 | } |
9749 | |
9750 | |
9751 | static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, |
9752 | const APInt &Bits) { |
9753 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9754 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9755 | EVT VT = Op.getValueType(); |
9756 | MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64; |
9757 | |
9758 | if (AArch64_AM::isAdvSIMDModImmType10(Value)) { |
9759 | Value = AArch64_AM::encodeAdvSIMDModImmType10(Value); |
9760 | |
9761 | SDLoc dl(Op); |
9762 | SDValue Mov = DAG.getNode(NewOp, dl, MovTy, |
9763 | DAG.getConstant(Value, dl, MVT::i32)); |
9764 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9765 | } |
9766 | } |
9767 | |
9768 | return SDValue(); |
9769 | } |
9770 | |
9771 | |
9772 | static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, |
9773 | const APInt &Bits, |
9774 | const SDValue *LHS = nullptr) { |
9775 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9776 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9777 | EVT VT = Op.getValueType(); |
9778 | MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; |
9779 | bool isAdvSIMDModImm = false; |
9780 | uint64_t Shift; |
9781 | |
9782 | if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) { |
9783 | Value = AArch64_AM::encodeAdvSIMDModImmType1(Value); |
9784 | Shift = 0; |
9785 | } |
9786 | else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) { |
9787 | Value = AArch64_AM::encodeAdvSIMDModImmType2(Value); |
9788 | Shift = 8; |
9789 | } |
9790 | else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) { |
9791 | Value = AArch64_AM::encodeAdvSIMDModImmType3(Value); |
9792 | Shift = 16; |
9793 | } |
9794 | else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) { |
9795 | Value = AArch64_AM::encodeAdvSIMDModImmType4(Value); |
9796 | Shift = 24; |
9797 | } |
9798 | |
9799 | if (isAdvSIMDModImm) { |
9800 | SDLoc dl(Op); |
9801 | SDValue Mov; |
9802 | |
9803 | if (LHS) |
9804 | Mov = DAG.getNode(NewOp, dl, MovTy, *LHS, |
9805 | DAG.getConstant(Value, dl, MVT::i32), |
9806 | DAG.getConstant(Shift, dl, MVT::i32)); |
9807 | else |
9808 | Mov = DAG.getNode(NewOp, dl, MovTy, |
9809 | DAG.getConstant(Value, dl, MVT::i32), |
9810 | DAG.getConstant(Shift, dl, MVT::i32)); |
9811 | |
9812 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9813 | } |
9814 | } |
9815 | |
9816 | return SDValue(); |
9817 | } |
9818 | |
9819 | |
9820 | static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, |
9821 | const APInt &Bits, |
9822 | const SDValue *LHS = nullptr) { |
9823 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9824 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9825 | EVT VT = Op.getValueType(); |
9826 | MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; |
9827 | bool isAdvSIMDModImm = false; |
9828 | uint64_t Shift; |
9829 | |
9830 | if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) { |
9831 | Value = AArch64_AM::encodeAdvSIMDModImmType5(Value); |
9832 | Shift = 0; |
9833 | } |
9834 | else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) { |
9835 | Value = AArch64_AM::encodeAdvSIMDModImmType6(Value); |
9836 | Shift = 8; |
9837 | } |
9838 | |
9839 | if (isAdvSIMDModImm) { |
9840 | SDLoc dl(Op); |
9841 | SDValue Mov; |
9842 | |
9843 | if (LHS) |
9844 | Mov = DAG.getNode(NewOp, dl, MovTy, *LHS, |
9845 | DAG.getConstant(Value, dl, MVT::i32), |
9846 | DAG.getConstant(Shift, dl, MVT::i32)); |
9847 | else |
9848 | Mov = DAG.getNode(NewOp, dl, MovTy, |
9849 | DAG.getConstant(Value, dl, MVT::i32), |
9850 | DAG.getConstant(Shift, dl, MVT::i32)); |
9851 | |
9852 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9853 | } |
9854 | } |
9855 | |
9856 | return SDValue(); |
9857 | } |
9858 | |
9859 | |
9860 | static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, |
9861 | SelectionDAG &DAG, const APInt &Bits) { |
9862 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9863 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9864 | EVT VT = Op.getValueType(); |
9865 | MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; |
9866 | bool isAdvSIMDModImm = false; |
9867 | uint64_t Shift; |
9868 | |
9869 | if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) { |
9870 | Value = AArch64_AM::encodeAdvSIMDModImmType7(Value); |
9871 | Shift = 264; |
9872 | } |
9873 | else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) { |
9874 | Value = AArch64_AM::encodeAdvSIMDModImmType8(Value); |
9875 | Shift = 272; |
9876 | } |
9877 | |
9878 | if (isAdvSIMDModImm) { |
9879 | SDLoc dl(Op); |
9880 | SDValue Mov = DAG.getNode(NewOp, dl, MovTy, |
9881 | DAG.getConstant(Value, dl, MVT::i32), |
9882 | DAG.getConstant(Shift, dl, MVT::i32)); |
9883 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9884 | } |
9885 | } |
9886 | |
9887 | return SDValue(); |
9888 | } |
9889 | |
9890 | |
9891 | static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, |
9892 | const APInt &Bits) { |
9893 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9894 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9895 | EVT VT = Op.getValueType(); |
9896 | MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; |
9897 | |
9898 | if (AArch64_AM::isAdvSIMDModImmType9(Value)) { |
9899 | Value = AArch64_AM::encodeAdvSIMDModImmType9(Value); |
9900 | |
9901 | SDLoc dl(Op); |
9902 | SDValue Mov = DAG.getNode(NewOp, dl, MovTy, |
9903 | DAG.getConstant(Value, dl, MVT::i32)); |
9904 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9905 | } |
9906 | } |
9907 | |
9908 | return SDValue(); |
9909 | } |
9910 | |
9911 | |
9912 | static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, |
9913 | const APInt &Bits) { |
9914 | if (Bits.getHiBits(64) == Bits.getLoBits(64)) { |
9915 | uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); |
9916 | EVT VT = Op.getValueType(); |
9917 | bool isWide = (VT.getSizeInBits() == 128); |
9918 | MVT MovTy; |
9919 | bool isAdvSIMDModImm = false; |
9920 | |
9921 | if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) { |
9922 | Value = AArch64_AM::encodeAdvSIMDModImmType11(Value); |
9923 | MovTy = isWide ? MVT::v4f32 : MVT::v2f32; |
9924 | } |
9925 | else if (isWide && |
9926 | (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) { |
9927 | Value = AArch64_AM::encodeAdvSIMDModImmType12(Value); |
9928 | MovTy = MVT::v2f64; |
9929 | } |
9930 | |
9931 | if (isAdvSIMDModImm) { |
9932 | SDLoc dl(Op); |
9933 | SDValue Mov = DAG.getNode(NewOp, dl, MovTy, |
9934 | DAG.getConstant(Value, dl, MVT::i32)); |
9935 | return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov); |
9936 | } |
9937 | } |
9938 | |
9939 | return SDValue(); |
9940 | } |
9941 | |
9942 | |
9943 | |
9944 | |
9945 | static bool isAllConstantBuildVector(const SDValue &PotentialBVec, |
9946 | uint64_t &ConstVal) { |
9947 | BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec); |
9948 | if (!Bvec) |
9949 | return false; |
9950 | ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0)); |
9951 | if (!FirstElt) |
9952 | return false; |
9953 | EVT VT = Bvec->getValueType(0); |
9954 | unsigned NumElts = VT.getVectorNumElements(); |
9955 | for (unsigned i = 1; i < NumElts; ++i) |
9956 | if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt) |
9957 | return false; |
9958 | ConstVal = FirstElt->getZExtValue(); |
9959 | return true; |
9960 | } |
9961 | |
9962 | static unsigned getIntrinsicID(const SDNode *N) { |
9963 | unsigned Opcode = N->getOpcode(); |
9964 | switch (Opcode) { |
9965 | default: |
9966 | return Intrinsic::not_intrinsic; |
9967 | case ISD::INTRINSIC_WO_CHAIN: { |
9968 | unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); |
9969 | if (IID < Intrinsic::num_intrinsics) |
9970 | return IID; |
9971 | return Intrinsic::not_intrinsic; |
9972 | } |
9973 | } |
9974 | } |
9975 | |
9976 | |
9977 | |
9978 | |
9979 | |
9980 | |
9981 | |
9982 | static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { |
9983 | EVT VT = N->getValueType(0); |
9984 | |
9985 | if (!VT.isVector()) |
9986 | return SDValue(); |
9987 | |
9988 | SDLoc DL(N); |
9989 | |
9990 | SDValue And; |
9991 | SDValue Shift; |
9992 | |
9993 | SDValue FirstOp = N->getOperand(0); |
9994 | unsigned FirstOpc = FirstOp.getOpcode(); |
9995 | SDValue SecondOp = N->getOperand(1); |
9996 | unsigned SecondOpc = SecondOp.getOpcode(); |
9997 | |
9998 | |
9999 | |
10000 | |
10001 | |
10002 | if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) && |
10003 | (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) { |
10004 | And = FirstOp; |
10005 | Shift = SecondOp; |
10006 | |
10007 | } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) && |
10008 | (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) { |
10009 | And = SecondOp; |
10010 | Shift = FirstOp; |
10011 | } else |
10012 | return SDValue(); |
10013 | |
10014 | bool IsAnd = And.getOpcode() == ISD::AND; |
10015 | bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR; |
10016 | |
10017 | |
10018 | ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); |
10019 | if (!C2node) |
10020 | return SDValue(); |
10021 | |
10022 | uint64_t C1; |
10023 | if (IsAnd) { |
10024 | |
10025 | if (!isAllConstantBuildVector(And.getOperand(1), C1)) |
10026 | return SDValue(); |
10027 | } else { |
10028 | |
10029 | ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1)); |
10030 | ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2)); |
10031 | assert(C1nodeImm && C1nodeShift); |
10032 | C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue()); |
10033 | } |
10034 | |
10035 | |
10036 | |
10037 | |
10038 | uint64_t C2 = C2node->getZExtValue(); |
10039 | unsigned ElemSizeInBits = VT.getScalarSizeInBits(); |
10040 | if (C2 > ElemSizeInBits) |
10041 | return SDValue(); |
10042 | |
10043 | APInt C1AsAPInt(ElemSizeInBits, C1); |
10044 | APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2) |
10045 | : APInt::getLowBitsSet(ElemSizeInBits, C2); |
10046 | if (C1AsAPInt != RequiredC1) |
10047 | return SDValue(); |
10048 | |
10049 | SDValue X = And.getOperand(0); |
10050 | SDValue Y = Shift.getOperand(0); |
10051 | |
10052 | unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI; |
10053 | SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1)); |
10054 | |
10055 | LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n"); |
10056 | LLVM_DEBUG(N->dump(&DAG)); |
10057 | LLVM_DEBUG(dbgs() << "into: \n"); |
10058 | LLVM_DEBUG(ResultSLI->dump(&DAG)); |
10059 | |
10060 | ++NumShiftInserts; |
10061 | return ResultSLI; |
10062 | } |
10063 | |
10064 | SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, |
10065 | SelectionDAG &DAG) const { |
10066 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
10067 | return LowerToScalableOp(Op, DAG); |
10068 | |
10069 | |
10070 | if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG)) |
10071 | return Res; |
10072 | |
10073 | EVT VT = Op.getValueType(); |
10074 | |
10075 | SDValue LHS = Op.getOperand(0); |
10076 | BuildVectorSDNode *BVN = |
10077 | dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode()); |
10078 | if (!BVN) { |
10079 | |
10080 | LHS = Op.getOperand(1); |
10081 | BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode()); |
10082 | } |
10083 | if (!BVN) |
10084 | return Op; |
10085 | |
10086 | APInt DefBits(VT.getSizeInBits(), 0); |
10087 | APInt UndefBits(VT.getSizeInBits(), 0); |
10088 | if (resolveBuildVector(BVN, DefBits, UndefBits)) { |
10089 | SDValue NewOp; |
10090 | |
10091 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, |
10092 | DefBits, &LHS)) || |
10093 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, |
10094 | DefBits, &LHS))) |
10095 | return NewOp; |
10096 | |
10097 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG, |
10098 | UndefBits, &LHS)) || |
10099 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG, |
10100 | UndefBits, &LHS))) |
10101 | return NewOp; |
10102 | } |
10103 | |
10104 | |
10105 | return Op; |
10106 | } |
10107 | |
10108 | |
10109 | |
10110 | static SDValue NormalizeBuildVector(SDValue Op, |
10111 | SelectionDAG &DAG) { |
10112 | assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); |
10113 | SDLoc dl(Op); |
10114 | EVT VT = Op.getValueType(); |
10115 | EVT EltTy= VT.getVectorElementType(); |
10116 | |
10117 | if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16) |
10118 | return Op; |
10119 | |
10120 | SmallVector<SDValue, 16> Ops; |
10121 | for (SDValue Lane : Op->ops()) { |
10122 | |
10123 | |
10124 | |
10125 | |
10126 | if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) { |
10127 | APInt LowBits(EltTy.getSizeInBits(), |
10128 | CstLane->getZExtValue()); |
10129 | Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32); |
10130 | } else if (Lane.getNode()->isUndef()) { |
10131 | Lane = DAG.getUNDEF(MVT::i32); |
10132 | } else { |
10133 | assert(Lane.getValueType() == MVT::i32 && |
10134 | "Unexpected BUILD_VECTOR operand type"); |
10135 | } |
10136 | Ops.push_back(Lane); |
10137 | } |
10138 | return DAG.getBuildVector(VT, dl, Ops); |
10139 | } |
10140 | |
10141 | static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) { |
10142 | EVT VT = Op.getValueType(); |
10143 | |
10144 | APInt DefBits(VT.getSizeInBits(), 0); |
10145 | APInt UndefBits(VT.getSizeInBits(), 0); |
10146 | BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); |
10147 | if (resolveBuildVector(BVN, DefBits, UndefBits)) { |
10148 | SDValue NewOp; |
10149 | if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || |
10150 | (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || |
10151 | (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || |
10152 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || |
10153 | (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || |
10154 | (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) |
10155 | return NewOp; |
10156 | |
10157 | DefBits = ~DefBits; |
10158 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || |
10159 | (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || |
10160 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) |
10161 | return NewOp; |
10162 | |
10163 | DefBits = UndefBits; |
10164 | if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) || |
10165 | (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || |
10166 | (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) || |
10167 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) || |
10168 | (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) || |
10169 | (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits))) |
10170 | return NewOp; |
10171 | |
10172 | DefBits = ~UndefBits; |
10173 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) || |
10174 | (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) || |
10175 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits))) |
10176 | return NewOp; |
10177 | } |
10178 | |
10179 | return SDValue(); |
10180 | } |
10181 | |
10182 | SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, |
10183 | SelectionDAG &DAG) const { |
10184 | EVT VT = Op.getValueType(); |
10185 | |
10186 | |
10187 | Op = NormalizeBuildVector(Op, DAG); |
10188 | if (VT.isInteger()) { |
10189 | |
10190 | |
10191 | |
10192 | |
10193 | BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); |
10194 | if (BVN->isConstant()) |
10195 | if (ConstantSDNode *Const = BVN->getConstantSplatNode()) { |
10196 | unsigned BitSize = VT.getVectorElementType().getSizeInBits(); |
10197 | APInt Val(BitSize, |
10198 | Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue()); |
10199 | if (Val.isNullValue() || Val.isAllOnesValue()) |
10200 | return Op; |
10201 | } |
10202 | } |
10203 | |
10204 | if (SDValue V = ConstantBuildVector(Op, DAG)) |
10205 | return V; |
10206 | |
10207 | |
10208 | |
10209 | |
10210 | |
10211 | |
10212 | |
10213 | |
10214 | |
10215 | |
10216 | |
10217 | |
10218 | |
10219 | SDLoc dl(Op); |
10220 | unsigned NumElts = VT.getVectorNumElements(); |
10221 | bool isOnlyLowElement = true; |
10222 | bool usesOnlyOneValue = true; |
10223 | bool usesOnlyOneConstantValue = true; |
10224 | bool isConstant = true; |
10225 | bool AllLanesExtractElt = true; |
10226 | unsigned NumConstantLanes = 0; |
10227 | unsigned NumDifferentLanes = 0; |
10228 | unsigned NumUndefLanes = 0; |
10229 | SDValue Value; |
10230 | SDValue ConstantValue; |
10231 | for (unsigned i = 0; i < NumElts; ++i) { |
10232 | SDValue V = Op.getOperand(i); |
10233 | if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
10234 | AllLanesExtractElt = false; |
10235 | if (V.isUndef()) { |
10236 | ++NumUndefLanes; |
10237 | continue; |
10238 | } |
10239 | if (i > 0) |
10240 | isOnlyLowElement = false; |
10241 | if (!isIntOrFPConstant(V)) |
10242 | isConstant = false; |
10243 | |
10244 | if (isIntOrFPConstant(V)) { |
10245 | ++NumConstantLanes; |
10246 | if (!ConstantValue.getNode()) |
10247 | ConstantValue = V; |
10248 | else if (ConstantValue != V) |
10249 | usesOnlyOneConstantValue = false; |
10250 | } |
10251 | |
10252 | if (!Value.getNode()) |
10253 | Value = V; |
10254 | else if (V != Value) { |
10255 | usesOnlyOneValue = false; |
10256 | ++NumDifferentLanes; |
10257 | } |
10258 | } |
10259 | |
10260 | if (!Value.getNode()) { |
10261 | LLVM_DEBUG( |
10262 | dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"); |
10263 | return DAG.getUNDEF(VT); |
10264 | } |
10265 | |
10266 | |
10267 | |
10268 | |
10269 | if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) { |
10270 | LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 " |
10271 | "SCALAR_TO_VECTOR node\n"); |
10272 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); |
10273 | } |
10274 | |
10275 | if (AllLanesExtractElt) { |
10276 | SDNode *Vector = nullptr; |
10277 | bool Even = false; |
10278 | bool Odd = false; |
10279 | |
10280 | |
10281 | for (unsigned i = 0; i < NumElts; ++i) { |
10282 | SDValue V = Op.getOperand(i); |
10283 | const SDNode *N = V.getNode(); |
10284 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
10285 | break; |
10286 | SDValue N0 = N->getOperand(0); |
10287 | |
10288 | |
10289 | if (!Vector) { |
10290 | Vector = N0.getNode(); |
10291 | |
10292 | |
10293 | if (VT.getVectorElementType() != |
10294 | N0.getValueType().getVectorElementType()) |
10295 | break; |
10296 | } else if (Vector != N0.getNode()) { |
10297 | Odd = false; |
10298 | Even = false; |
10299 | break; |
10300 | } |
10301 | |
10302 | |
10303 | |
10304 | uint64_t Val = N->getConstantOperandVal(1); |
10305 | if (Val == 2 * i) { |
10306 | Even = true; |
10307 | continue; |
10308 | } |
10309 | if (Val - 1 == 2 * i) { |
10310 | Odd = true; |
10311 | continue; |
10312 | } |
10313 | |
10314 | |
10315 | Odd = false; |
10316 | Even = false; |
10317 | break; |
10318 | } |
10319 | if (Even || Odd) { |
10320 | SDValue LHS = |
10321 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0), |
10322 | DAG.getConstant(0, dl, MVT::i64)); |
10323 | SDValue RHS = |
10324 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0), |
10325 | DAG.getConstant(NumElts, dl, MVT::i64)); |
10326 | |
10327 | if (Even && !Odd) |
10328 | return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS, |
10329 | RHS); |
10330 | if (Odd && !Even) |
10331 | return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS, |
10332 | RHS); |
10333 | } |
10334 | } |
10335 | |
10336 | |
10337 | |
10338 | if (usesOnlyOneValue) { |
10339 | if (!isConstant) { |
10340 | if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
10341 | Value.getValueType() != VT) { |
10342 | LLVM_DEBUG( |
10343 | dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"); |
10344 | return DAG.getNode(AArch64ISD::DUP, dl, VT, Value); |
10345 | } |
10346 | |
10347 | |
10348 | |
10349 | SDValue Lane = Value.getOperand(1); |
10350 | Value = Value.getOperand(0); |
10351 | if (Value.getValueSizeInBits() == 64) { |
10352 | LLVM_DEBUG( |
10353 | dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, " |
10354 | "widening it\n"); |
10355 | Value = WidenVector(Value, DAG); |
10356 | } |
10357 | |
10358 | unsigned Opcode = getDUPLANEOp(VT.getVectorElementType()); |
10359 | return DAG.getNode(Opcode, dl, VT, Value, Lane); |
10360 | } |
10361 | |
10362 | if (VT.getVectorElementType().isFloatingPoint()) { |
10363 | SmallVector<SDValue, 8> Ops; |
10364 | EVT EltTy = VT.getVectorElementType(); |
10365 | assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || |
10366 | EltTy == MVT::f64) && "Unsupported floating-point vector type"); |
10367 | LLVM_DEBUG( |
10368 | dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int " |
10369 | "BITCASTS, and try again\n"); |
10370 | MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits()); |
10371 | for (unsigned i = 0; i < NumElts; ++i) |
10372 | Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); |
10373 | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); |
10374 | SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); |
10375 | LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "; |
10376 | Val.dump();); |
10377 | Val = LowerBUILD_VECTOR(Val, DAG); |
10378 | if (Val.getNode()) |
10379 | return DAG.getNode(ISD::BITCAST, dl, VT, Val); |
10380 | } |
10381 | } |
10382 | |
10383 | |
10384 | |
10385 | |
10386 | |
10387 | bool PreferDUPAndInsert = |
10388 | !isConstant && NumDifferentLanes >= 1 && |
10389 | NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) && |
10390 | NumDifferentLanes >= NumConstantLanes; |
10391 | |
10392 | |
10393 | |
10394 | |
10395 | |
10396 | if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) { |
10397 | |
10398 | SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue), |
10399 | Val = ConstantBuildVector(Vec, DAG); |
10400 | if (!Val) { |
10401 | |
10402 | Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); |
10403 | DAG.ReplaceAllUsesWith(Vec.getNode(), &Val); |
10404 | } |
10405 | |
10406 | |
10407 | for (unsigned i = 0; i < NumElts; ++i) { |
10408 | SDValue V = Op.getOperand(i); |
10409 | SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); |
10410 | if (!isIntOrFPConstant(V)) |
10411 | |
10412 | |
10413 | Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); |
10414 | } |
10415 | return Val; |
10416 | } |
10417 | |
10418 | |
10419 | if (isConstant) { |
10420 | LLVM_DEBUG( |
10421 | dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default " |
10422 | "expansion\n"); |
10423 | return SDValue(); |
10424 | } |
10425 | |
10426 | |
10427 | if (NumElts >= 4) { |
10428 | if (SDValue shuffle = ReconstructShuffle(Op, DAG)) |
10429 | return shuffle; |
10430 | } |
10431 | |
10432 | if (PreferDUPAndInsert) { |
10433 | |
10434 | SmallVector<SDValue, 8> Ops(NumElts, Value); |
10435 | SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG); |
10436 | |
10437 | for (unsigned I = 0; I < NumElts; ++I) |
10438 | if (Op.getOperand(I) != Value) |
10439 | NewVector = |
10440 | DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector, |
10441 | Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64)); |
10442 | |
10443 | return NewVector; |
10444 | } |
10445 | |
10446 | |
10447 | |
10448 | |
10449 | |
10450 | |
10451 | |
10452 | if (!isConstant && !usesOnlyOneValue) { |
10453 | LLVM_DEBUG( |
10454 | dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence " |
10455 | "of INSERT_VECTOR_ELT\n"); |
10456 | |
10457 | SDValue Vec = DAG.getUNDEF(VT); |
10458 | SDValue Op0 = Op.getOperand(0); |
10459 | unsigned i = 0; |
10460 | |
10461 | |
10462 | |
10463 | |
10464 | |
10465 | |
10466 | |
10467 | |
10468 | |
10469 | |
10470 | |
10471 | if (!Op0.isUndef()) { |
10472 | LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n"); |
10473 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0); |
10474 | ++i; |
10475 | } |
10476 | LLVM_DEBUG(if (i < NumElts) dbgs() |
10477 | << "Creating nodes for the other vector elements:\n";); |
10478 | for (; i < NumElts; ++i) { |
10479 | SDValue V = Op.getOperand(i); |
10480 | if (V.isUndef()) |
10481 | continue; |
10482 | SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64); |
10483 | Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); |
10484 | } |
10485 | return Vec; |
10486 | } |
10487 | |
10488 | LLVM_DEBUG( |
10489 | dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find " |
10490 | "better alternative\n"); |
10491 | return SDValue(); |
10492 | } |
10493 | |
10494 | SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, |
10495 | SelectionDAG &DAG) const { |
10496 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
10497 | return LowerFixedLengthConcatVectorsToSVE(Op, DAG); |
10498 | |
10499 | assert(Op.getValueType().isScalableVector() && |
10500 | isTypeLegal(Op.getValueType()) && |
10501 | "Expected legal scalable vector type!"); |
10502 | |
10503 | if (isTypeLegal(Op.getOperand(0).getValueType())) { |
10504 | unsigned NumOperands = Op->getNumOperands(); |
10505 | assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && |
10506 | "Unexpected number of operands in CONCAT_VECTORS"); |
10507 | |
10508 | if (NumOperands == 2) |
10509 | return Op; |
10510 | |
10511 | |
10512 | SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end()); |
10513 | while (ConcatOps.size() > 1) { |
10514 | for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) { |
10515 | SDValue V1 = ConcatOps[I]; |
10516 | SDValue V2 = ConcatOps[I + 1]; |
10517 | EVT SubVT = V1.getValueType(); |
10518 | EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext()); |
10519 | ConcatOps[I / 2] = |
10520 | DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2); |
10521 | } |
10522 | ConcatOps.resize(ConcatOps.size() / 2); |
10523 | } |
10524 | return ConcatOps[0]; |
10525 | } |
10526 | |
10527 | return SDValue(); |
10528 | } |
10529 | |
10530 | SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
10531 | SelectionDAG &DAG) const { |
10532 | assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); |
10533 | |
10534 | if (useSVEForFixedLengthVectorVT(Op.getValueType())) |
10535 | return LowerFixedLengthInsertVectorElt(Op, DAG); |
10536 | |
10537 | |
10538 | EVT VT = Op.getOperand(0).getValueType(); |
10539 | |
10540 | if (VT.getScalarType() == MVT::i1) { |
10541 | EVT VectorVT = getPromotedVTForPredicate(VT); |
10542 | SDLoc DL(Op); |
10543 | SDValue ExtendedVector = |
10544 | DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT); |
10545 | SDValue ExtendedValue = |
10546 | DAG.getAnyExtOrTrunc(Op.getOperand(1), DL, |
10547 | VectorVT.getScalarType().getSizeInBits() < 32 |
10548 | ? MVT::i32 |
10549 | : VectorVT.getScalarType()); |
10550 | ExtendedVector = |
10551 | DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector, |
10552 | ExtendedValue, Op.getOperand(2)); |
10553 | return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT); |
10554 | } |
10555 | |
10556 | ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
10557 | if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) |
10558 | return SDValue(); |
10559 | |
10560 | |
10561 | if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || |
10562 | VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || |
10563 | VT == MVT::v8f16 || VT == MVT::v8bf16) |
10564 | return Op; |
10565 | |
10566 | if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && |
10567 | VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 && |
10568 | VT != MVT::v4bf16) |
10569 | return SDValue(); |
10570 | |
10571 | |
10572 | |
10573 | SDLoc DL(Op); |
10574 | SDValue WideVec = WidenVector(Op.getOperand(0), DAG); |
10575 | EVT WideTy = WideVec.getValueType(); |
10576 | |
10577 | SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec, |
10578 | Op.getOperand(1), Op.getOperand(2)); |
10579 | |
10580 | return NarrowVector(Node, DAG); |
10581 | } |
10582 | |
10583 | SDValue |
10584 | AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
10585 | SelectionDAG &DAG) const { |
10586 | assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); |
10587 | EVT VT = Op.getOperand(0).getValueType(); |
10588 | |
10589 | if (VT.getScalarType() == MVT::i1) { |
10590 | |
10591 | |
10592 | EVT VectorVT = getPromotedVTForPredicate(VT); |
10593 | SDLoc DL(Op); |
10594 | SDValue Extend = |
10595 | DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0)); |
10596 | MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32; |
10597 | SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy, |
10598 | Extend, Op.getOperand(1)); |
10599 | return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType()); |
10600 | } |
10601 | |
10602 | if (useSVEForFixedLengthVectorVT(VT)) |
10603 | return LowerFixedLengthExtractVectorElt(Op, DAG); |
10604 | |
10605 | |
10606 | ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
10607 | if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) |
10608 | return SDValue(); |
10609 | |
10610 | |
10611 | if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || |
10612 | VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 || |
10613 | VT == MVT::v8f16 || VT == MVT::v8bf16) |
10614 | return Op; |
10615 | |
10616 | if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && |
10617 | VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 && |
10618 | VT != MVT::v4bf16) |
10619 | return SDValue(); |
10620 | |
10621 | |
10622 | |
10623 | SDLoc DL(Op); |
10624 | SDValue WideVec = WidenVector(Op.getOperand(0), DAG); |
10625 | EVT WideTy = WideVec.getValueType(); |
10626 | |
10627 | EVT ExtrTy = WideTy.getVectorElementType(); |
10628 | if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8) |
10629 | ExtrTy = MVT::i32; |
10630 | |
10631 | |
10632 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec, |
10633 | Op.getOperand(1)); |
10634 | } |
10635 | |
10636 | SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, |
10637 | SelectionDAG &DAG) const { |
10638 | assert(Op.getValueType().isFixedLengthVector() && |
10639 | "Only cases that extract a fixed length vector are supported!"); |
10640 | |
10641 | EVT InVT = Op.getOperand(0).getValueType(); |
10642 | unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); |
10643 | unsigned Size = Op.getValueSizeInBits(); |
10644 | |
10645 | if (InVT.isScalableVector()) { |
10646 | |
10647 | if (Idx == 0 && isPackedVectorType(InVT, DAG)) |
10648 | return Op; |
10649 | |
10650 | return SDValue(); |
10651 | } |
10652 | |
10653 | |
10654 | if (Idx == 0 && InVT.getSizeInBits() <= 128) |
10655 | return Op; |
10656 | |
10657 | |
10658 | |
10659 | if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 && |
10660 | InVT.getSizeInBits() == 128) |
10661 | return Op; |
10662 | |
10663 | return SDValue(); |
10664 | } |
10665 | |
10666 | SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, |
10667 | SelectionDAG &DAG) const { |
10668 | assert(Op.getValueType().isScalableVector() && |
10669 | "Only expect to lower inserts into scalable vectors!"); |
10670 | |
10671 | EVT InVT = Op.getOperand(1).getValueType(); |
10672 | unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); |
10673 | |
10674 | if (InVT.isScalableVector()) { |
10675 | SDLoc DL(Op); |
10676 | EVT VT = Op.getValueType(); |
10677 | |
10678 | if (!isTypeLegal(VT) || !VT.isInteger()) |
10679 | return SDValue(); |
10680 | |
10681 | SDValue Vec0 = Op.getOperand(0); |
10682 | SDValue Vec1 = Op.getOperand(1); |
10683 | |
10684 | |
10685 | if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) |
10686 | return SDValue(); |
10687 | |
10688 | |
10689 | EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); |
10690 | SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); |
10691 | |
10692 | if (Idx == 0) { |
10693 | SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); |
10694 | return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0); |
10695 | } else if (Idx == InVT.getVectorMinNumElements()) { |
10696 | SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0); |
10697 | return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec); |
10698 | } |
10699 | |
10700 | return SDValue(); |
10701 | } |
10702 | |
10703 | |
10704 | if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef()) |
10705 | return Op; |
10706 | |
10707 | return SDValue(); |
10708 | } |
10709 | |
10710 | SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { |
10711 | EVT VT = Op.getValueType(); |
10712 | |
10713 | if (useSVEForFixedLengthVectorVT(VT, true)) |
10714 | return LowerFixedLengthVectorIntDivideToSVE(Op, DAG); |
10715 | |
10716 | assert(VT.isScalableVector() && "Expected a scalable vector."); |
10717 | |
10718 | bool Signed = Op.getOpcode() == ISD::SDIV; |
10719 | unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; |
10720 | |
10721 | if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) |
10722 | return LowerToPredicatedOp(Op, DAG, PredOpcode); |
10723 | |
10724 | |
10725 | |
10726 | EVT WidenedVT; |
10727 | if (VT == MVT::nxv16i8) |
10728 | WidenedVT = MVT::nxv8i16; |
10729 | else if (VT == MVT::nxv8i16) |
10730 | WidenedVT = MVT::nxv4i32; |
10731 | else |
10732 | llvm_unreachable("Unexpected Custom DIV operation"); |
10733 | |
10734 | SDLoc dl(Op); |
10735 | unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; |
10736 | unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; |
10737 | SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0)); |
10738 | SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1)); |
10739 | SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0)); |
10740 | SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1)); |
10741 | SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo); |
10742 | SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi); |
10743 | return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi); |
10744 | } |
10745 | |
10746 | bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { |
10747 | |
10748 | if (useSVEForFixedLengthVectorVT(VT)) |
10749 | return false; |
10750 | |
10751 | if (VT.getVectorNumElements() == 4 && |
10752 | (VT.is128BitVector() || VT.is64BitVector())) { |
10753 | unsigned PFIndexes[4]; |
10754 | for (unsigned i = 0; i != 4; ++i) { |
10755 | if (M[i] < 0) |
10756 | PFIndexes[i] = 8; |
10757 | else |
10758 | PFIndexes[i] = M[i]; |
10759 | } |
10760 | |
10761 | |
10762 | unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + |
10763 | PFIndexes[2] * 9 + PFIndexes[3]; |
10764 | unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; |
10765 | unsigned Cost = (PFEntry >> 30); |
10766 | |
10767 | if (Cost <= 4) |
10768 | return true; |
10769 | } |
10770 | |
10771 | bool DummyBool; |
10772 | int DummyInt; |
10773 | unsigned DummyUnsigned; |
10774 | |
10775 | return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) || |
10776 | isREVMask(M, VT, 32) || isREVMask(M, VT, 16) || |
10777 | isEXTMask(M, VT, DummyBool, DummyUnsigned) || |
10778 | |
10779 | isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) || |
10780 | isZIPMask(M, VT, DummyUnsigned) || |
10781 | isTRN_v_undef_Mask(M, VT, DummyUnsigned) || |
10782 | isUZP_v_undef_Mask(M, VT, DummyUnsigned) || |
10783 | isZIP_v_undef_Mask(M, VT, DummyUnsigned) || |
10784 | isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) || |
10785 | isConcatMask(M, VT, VT.getSizeInBits() == 128)); |
10786 | } |
10787 | |
10788 | |
10789 | |
10790 | |
10791 | static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { |
10792 | |
10793 | while (Op.getOpcode() == ISD::BITCAST) |
10794 | Op = Op.getOperand(0); |
10795 | BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); |
10796 | APInt SplatBits, SplatUndef; |
10797 | unsigned SplatBitSize; |
10798 | bool HasAnyUndefs; |
10799 | if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, |
10800 | HasAnyUndefs, ElementBits) || |
10801 | SplatBitSize > ElementBits) |
10802 | return false; |
10803 | Cnt = SplatBits.getSExtValue(); |
10804 | return true; |
10805 | } |
10806 | |
10807 | |
10808 | |
10809 | |
10810 | |
10811 | static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { |
10812 | assert(VT.isVector() && "vector shift count is not a vector type"); |
10813 | int64_t ElementBits = VT.getScalarSizeInBits(); |
10814 | if (!getVShiftImm(Op, ElementBits, Cnt)) |
10815 | return false; |
10816 | return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); |
10817 | } |
10818 | |
10819 | |
10820 | |
10821 | |
10822 | static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) { |
10823 | assert(VT.isVector() && "vector shift count is not a vector type"); |
10824 | int64_t ElementBits = VT.getScalarSizeInBits(); |
10825 | if (!getVShiftImm(Op, ElementBits, Cnt)) |
10826 | return false; |
10827 | return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); |
10828 | } |
10829 | |
10830 | SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op, |
10831 | SelectionDAG &DAG) const { |
10832 | EVT VT = Op.getValueType(); |
10833 | |
10834 | if (VT.getScalarType() == MVT::i1) { |
10835 | |
10836 | SDLoc dl(Op); |
10837 | EVT OpVT = Op.getOperand(0).getValueType(); |
10838 | SDValue Zero = DAG.getConstant(0, dl, OpVT); |
10839 | SDValue One = DAG.getConstant(1, dl, OpVT); |
10840 | SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One); |
10841 | return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE); |
10842 | } |
10843 | |
10844 | if (!VT.isVector() || VT.isScalableVector()) |
10845 | return SDValue(); |
10846 | |
10847 | if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType())) |
10848 | return LowerFixedLengthVectorTruncateToSVE(Op, DAG); |
10849 | |
10850 | return SDValue(); |
10851 | } |
10852 | |
10853 | SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, |
10854 | SelectionDAG &DAG) const { |
10855 | EVT VT = Op.getValueType(); |
10856 | SDLoc DL(Op); |
10857 | int64_t Cnt; |
10858 | |
10859 | if (!Op.getOperand(1).getValueType().isVector()) |
10860 | return Op; |
10861 | unsigned EltSize = VT.getScalarSizeInBits(); |
10862 | |
10863 | switch (Op.getOpcode()) { |
10864 | default: |
10865 | llvm_unreachable("unexpected shift opcode"); |
10866 | |
10867 | case ISD::SHL: |
10868 | if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) |
10869 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); |
10870 | |
10871 | if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) |
10872 | return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), |
10873 | DAG.getConstant(Cnt, DL, MVT::i32)); |
10874 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, |
10875 | DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL, |
10876 | MVT::i32), |
10877 | Op.getOperand(0), Op.getOperand(1)); |
10878 | case ISD::SRA: |
10879 | case ISD::SRL: |
10880 | if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) { |
10881 | unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED |
10882 | : AArch64ISD::SRL_PRED; |
10883 | return LowerToPredicatedOp(Op, DAG, Opc); |
10884 | } |
10885 | |
10886 | |
10887 | if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) { |
10888 | unsigned Opc = |
10889 | (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; |
10890 | return DAG.getNode(Opc, DL, VT, Op.getOperand(0), |
10891 | DAG.getConstant(Cnt, DL, MVT::i32)); |
10892 | } |
10893 | |
10894 | |
10895 | |
10896 | |
10897 | unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl |
10898 | : Intrinsic::aarch64_neon_ushl; |
10899 | |
10900 | SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
10901 | Op.getOperand(1)); |
10902 | SDValue NegShiftLeft = |
10903 | DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, |
10904 | DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0), |
10905 | NegShift); |
10906 | return NegShiftLeft; |
10907 | } |
10908 | |
10909 | return SDValue(); |
10910 | } |
10911 | |
10912 | static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, |
10913 | AArch64CC::CondCode CC, bool NoNans, EVT VT, |
10914 | const SDLoc &dl, SelectionDAG &DAG) { |
10915 | EVT SrcVT = LHS.getValueType(); |
10916 | assert(VT.getSizeInBits() == SrcVT.getSizeInBits() && |
10917 | "function only supposed to emit natural comparisons"); |
10918 | |
10919 | BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode()); |
10920 | APInt CnstBits(VT.getSizeInBits(), 0); |
10921 | APInt UndefBits(VT.getSizeInBits(), 0); |
10922 | bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits); |
10923 | bool IsZero = IsCnst && (CnstBits == 0); |
10924 | |
10925 | if (SrcVT.getVectorElementType().isFloatingPoint()) { |
10926 | switch (CC) { |
10927 | default: |
10928 | return SDValue(); |
10929 | case AArch64CC::NE: { |
10930 | SDValue Fcmeq; |
10931 | if (IsZero) |
10932 | Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); |
10933 | else |
10934 | Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); |
10935 | return DAG.getNOT(dl, Fcmeq, VT); |
10936 | } |
10937 | case AArch64CC::EQ: |
10938 | if (IsZero) |
10939 | return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); |
10940 | return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); |
10941 | case AArch64CC::GE: |
10942 | if (IsZero) |
10943 | return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS); |
10944 | return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS); |
10945 | case AArch64CC::GT: |
10946 | if (IsZero) |
10947 | return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); |
10948 | return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); |
10949 | case AArch64CC::LS: |
10950 | if (IsZero) |
10951 | return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); |
10952 | return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS); |
10953 | case AArch64CC::LT: |
10954 | if (!NoNans) |
10955 | return SDValue(); |
10956 | |
10957 | LLVM_FALLTHROUGH; |
10958 | case AArch64CC::MI: |
10959 | if (IsZero) |
10960 | return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS); |
10961 | return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS); |
10962 | } |
10963 | } |
10964 | |
10965 | switch (CC) { |
10966 | default: |
10967 | return SDValue(); |
10968 | case AArch64CC::NE: { |
10969 | SDValue Cmeq; |
10970 | if (IsZero) |
10971 | Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); |
10972 | else |
10973 | Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); |
10974 | return DAG.getNOT(dl, Cmeq, VT); |
10975 | } |
10976 | case AArch64CC::EQ: |
10977 | if (IsZero) |
10978 | return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); |
10979 | return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); |
10980 | case AArch64CC::GE: |
10981 | if (IsZero) |
10982 | return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS); |
10983 | return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS); |
10984 | case AArch64CC::GT: |
10985 | if (IsZero) |
10986 | return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS); |
10987 | return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS); |
10988 | case AArch64CC::LE: |
10989 | if (IsZero) |
10990 | return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS); |
10991 | return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS); |
10992 | case AArch64CC::LS: |
10993 | return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS); |
10994 | case AArch64CC::LO: |
10995 | return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS); |
10996 | case AArch64CC::LT: |
10997 | if (IsZero) |
10998 | return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS); |
10999 | return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS); |
11000 | case AArch64CC::HI: |
11001 | return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS); |
11002 | case AArch64CC::HS: |
11003 | return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS); |
11004 | } |
11005 | } |
11006 | |
11007 | SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, |
11008 | SelectionDAG &DAG) const { |
11009 | if (Op.getValueType().isScalableVector()) |
11010 | return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO); |
11011 | |
11012 | if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType())) |
11013 | return LowerFixedLengthVectorSetccToSVE(Op, DAG); |
11014 | |
11015 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
11016 | SDValue LHS = Op.getOperand(0); |
11017 | SDValue RHS = Op.getOperand(1); |
11018 | EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger(); |
11019 | SDLoc dl(Op); |
11020 | |
11021 | if (LHS.getValueType().getVectorElementType().isInteger()) { |
11022 | assert(LHS.getValueType() == RHS.getValueType()); |
11023 | AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); |
11024 | SDValue Cmp = |
11025 | EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG); |
11026 | return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); |
11027 | } |
11028 | |
11029 | const bool FullFP16 = |
11030 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
11031 | |
11032 | |
11033 | |
11034 | if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) { |
11035 | if (LHS.getValueType().getVectorNumElements() == 4) { |
11036 | LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS); |
11037 | RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS); |
11038 | SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC); |
11039 | DAG.ReplaceAllUsesWith(Op, NewSetcc); |
11040 | CmpVT = MVT::v4i32; |
11041 | } else |
11042 | return SDValue(); |
11043 | } |
11044 | |
11045 | assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || |
11046 | LHS.getValueType().getVectorElementType() != MVT::f128); |
11047 | |
11048 | |
11049 | |
11050 | AArch64CC::CondCode CC1, CC2; |
11051 | bool ShouldInvert; |
11052 | changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); |
11053 | |
11054 | bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; |
11055 | SDValue Cmp = |
11056 | EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); |
11057 | if (!Cmp.getNode()) |
11058 | return SDValue(); |
11059 | |
11060 | if (CC2 != AArch64CC::AL) { |
11061 | SDValue Cmp2 = |
11062 | EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG); |
11063 | if (!Cmp2.getNode()) |
11064 | return SDValue(); |
11065 | |
11066 | Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2); |
11067 | } |
11068 | |
11069 | Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); |
11070 | |
11071 | if (ShouldInvert) |
11072 | Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType()); |
11073 | |
11074 | return Cmp; |
11075 | } |
11076 | |
11077 | static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, |
11078 | SelectionDAG &DAG) { |
11079 | SDValue VecOp = ScalarOp.getOperand(0); |
11080 | auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp); |
11081 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx, |
11082 | DAG.getConstant(0, DL, MVT::i64)); |
11083 | } |
11084 | |
11085 | SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, |
11086 | SelectionDAG &DAG) const { |
11087 | SDValue Src = Op.getOperand(0); |
11088 | |
11089 | |
11090 | EVT SrcVT = Src.getValueType(); |
11091 | bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND || |
11092 | Op.getOpcode() == ISD::VECREDUCE_OR || |
11093 | Op.getOpcode() == ISD::VECREDUCE_XOR || |
11094 | Op.getOpcode() == ISD::VECREDUCE_FADD || |
11095 | (Op.getOpcode() != ISD::VECREDUCE_ADD && |
11096 | SrcVT.getVectorElementType() == MVT::i64); |
11097 | if (SrcVT.isScalableVector() || |
11098 | useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { |
11099 | |
11100 | if (SrcVT.getVectorElementType() == MVT::i1) |
11101 | return LowerPredReductionToSVE(Op, DAG); |
11102 | |
11103 | switch (Op.getOpcode()) { |
11104 | case ISD::VECREDUCE_ADD: |
11105 | return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG); |
11106 | case ISD::VECREDUCE_AND: |
11107 | return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG); |
11108 | case ISD::VECREDUCE_OR: |
11109 | return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG); |
11110 | case ISD::VECREDUCE_SMAX: |
11111 | return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG); |
11112 | case ISD::VECREDUCE_SMIN: |
11113 | return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG); |
11114 | case ISD::VECREDUCE_UMAX: |
11115 | return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG); |
11116 | case ISD::VECREDUCE_UMIN: |
11117 | return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG); |
11118 | case ISD::VECREDUCE_XOR: |
11119 | return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG); |
11120 | case ISD::VECREDUCE_FADD: |
11121 | return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG); |
11122 | case ISD::VECREDUCE_FMAX: |
11123 | return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG); |
11124 | case ISD::VECREDUCE_FMIN: |
11125 | return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG); |
11126 | default: |
11127 | llvm_unreachable("Unhandled fixed length reduction"); |
11128 | } |
11129 | } |
11130 | |
11131 | |
11132 | SDLoc dl(Op); |
11133 | switch (Op.getOpcode()) { |
11134 | case ISD::VECREDUCE_ADD: |
11135 | return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG); |
11136 | case ISD::VECREDUCE_SMAX: |
11137 | return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG); |
11138 | case ISD::VECREDUCE_SMIN: |
11139 | return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG); |
11140 | case ISD::VECREDUCE_UMAX: |
11141 | return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG); |
11142 | case ISD::VECREDUCE_UMIN: |
11143 | return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG); |
11144 | case ISD::VECREDUCE_FMAX: { |
11145 | return DAG.getNode( |
11146 | ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), |
11147 | DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), |
11148 | Src); |
11149 | } |
11150 | case ISD::VECREDUCE_FMIN: { |
11151 | return DAG.getNode( |
11152 | ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), |
11153 | DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), |
11154 | Src); |
11155 | } |
11156 | default: |
11157 | llvm_unreachable("Unhandled reduction"); |
11158 | } |
11159 | } |
11160 | |
11161 | SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op, |
11162 | SelectionDAG &DAG) const { |
11163 | auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget()); |
11164 | if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) |
11165 | return SDValue(); |
11166 | |
11167 | |
11168 | SDLoc dl(Op); |
11169 | MVT VT = Op.getSimpleValueType(); |
11170 | SDValue RHS = Op.getOperand(2); |
11171 | AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode()); |
11172 | RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS); |
11173 | return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(), |
11174 | Op.getOperand(0), Op.getOperand(1), RHS, |
11175 | AN->getMemOperand()); |
11176 | } |
11177 | |
11178 | SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, |
11179 | SelectionDAG &DAG) const { |
11180 | auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget()); |
11181 | if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics()) |
11182 | return SDValue(); |
11183 | |
11184 | |
11185 | SDLoc dl(Op); |
11186 | MVT VT = Op.getSimpleValueType(); |
11187 | SDValue RHS = Op.getOperand(2); |
11188 | AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode()); |
11189 | RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS); |
11190 | return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(), |
11191 | Op.getOperand(0), Op.getOperand(1), RHS, |
11192 | AN->getMemOperand()); |
11193 | } |
11194 | |
11195 | SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC( |
11196 | SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const { |
11197 | SDLoc dl(Op); |
11198 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
11199 | SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0); |
11200 | |
11201 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
11202 | const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask(); |
11203 | if (Subtarget->hasCustomCallingConv()) |
11204 | TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask); |
11205 | |
11206 | Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size, |
11207 | DAG.getConstant(4, dl, MVT::i64)); |
11208 | Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue()); |
11209 | Chain = |
11210 | DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue), |
11211 | Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64), |
11212 | DAG.getRegisterMask(Mask), Chain.getValue(1)); |
11213 | |
11214 | |
11215 | |
11216 | |
11217 | |
11218 | Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size, |
11219 | DAG.getConstant(4, dl, MVT::i64)); |
11220 | return Chain; |
11221 | } |
11222 | |
11223 | SDValue |
11224 | AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, |
11225 | SelectionDAG &DAG) const { |
11226 | assert(Subtarget->isTargetWindows() && |
11227 | "Only Windows alloca probing supported"); |
11228 | SDLoc dl(Op); |
11229 | |
11230 | SDNode *Node = Op.getNode(); |
11231 | SDValue Chain = Op.getOperand(0); |
11232 | SDValue Size = Op.getOperand(1); |
11233 | MaybeAlign Align = |
11234 | cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue(); |
11235 | EVT VT = Node->getValueType(0); |
11236 | |
11237 | if (DAG.getMachineFunction().getFunction().hasFnAttribute( |
11238 | "no-stack-arg-probe")) { |
11239 | SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); |
11240 | Chain = SP.getValue(1); |
11241 | SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); |
11242 | if (Align) |
11243 | SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), |
11244 | DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); |
11245 | Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); |
11246 | SDValue Ops[2] = {SP, Chain}; |
11247 | return DAG.getMergeValues(Ops, dl); |
11248 | } |
11249 | |
11250 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); |
11251 | |
11252 | Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG); |
11253 | |
11254 | SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64); |
11255 | Chain = SP.getValue(1); |
11256 | SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size); |
11257 | if (Align) |
11258 | SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), |
11259 | DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); |
11260 | Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP); |
11261 | |
11262 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), |
11263 | DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); |
11264 | |
11265 | SDValue Ops[2] = {SP, Chain}; |
11266 | return DAG.getMergeValues(Ops, dl); |
11267 | } |
11268 | |
11269 | SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, |
11270 | SelectionDAG &DAG) const { |
11271 | EVT VT = Op.getValueType(); |
11272 | assert(VT != MVT::i64 && "Expected illegal VSCALE node"); |
11273 | |
11274 | SDLoc DL(Op); |
11275 | APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue(); |
11276 | return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)), |
11277 | DL, VT); |
11278 | } |
11279 | |
11280 | |
11281 | template <unsigned NumVecs> |
11282 | static bool |
11283 | setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, |
11284 | AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) { |
11285 | Info.opc = ISD::INTRINSIC_VOID; |
11286 | |
11287 | const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType()); |
11288 | ElementCount EC = VT.getVectorElementCount(); |
11289 | #ifndef NDEBUG |
11290 | |
11291 | for (unsigned I = 0; I < NumVecs; ++I) |
11292 | assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && |
11293 | "Invalid type."); |
11294 | #endif |
11295 | |
11296 | Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(), |
11297 | EC * NumVecs); |
11298 | Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1); |
11299 | Info.offset = 0; |
11300 | Info.align.reset(); |
11301 | Info.flags = MachineMemOperand::MOStore; |
11302 | return true; |
11303 | } |
11304 | |
11305 | |
11306 | |
11307 | |
11308 | bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
11309 | const CallInst &I, |
11310 | MachineFunction &MF, |
11311 | unsigned Intrinsic) const { |
11312 | auto &DL = I.getModule()->getDataLayout(); |
11313 | switch (Intrinsic) { |
11314 | case Intrinsic::aarch64_sve_st2: |
11315 | return setInfoSVEStN<2>(*this, DL, Info, I); |
11316 | case Intrinsic::aarch64_sve_st3: |
11317 | return setInfoSVEStN<3>(*this, DL, Info, I); |
11318 | case Intrinsic::aarch64_sve_st4: |
11319 | return setInfoSVEStN<4>(*this, DL, Info, I); |
11320 | case Intrinsic::aarch64_neon_ld2: |
11321 | case Intrinsic::aarch64_neon_ld3: |
11322 | case Intrinsic::aarch64_neon_ld4: |
11323 | case Intrinsic::aarch64_neon_ld1x2: |
11324 | case Intrinsic::aarch64_neon_ld1x3: |
11325 | case Intrinsic::aarch64_neon_ld1x4: |
11326 | case Intrinsic::aarch64_neon_ld2lane: |
11327 | case Intrinsic::aarch64_neon_ld3lane: |
11328 | case Intrinsic::aarch64_neon_ld4lane: |
11329 | case Intrinsic::aarch64_neon_ld2r: |
11330 | case Intrinsic::aarch64_neon_ld3r: |
11331 | case Intrinsic::aarch64_neon_ld4r: { |
11332 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11333 | |
11334 | uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; |
11335 | Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); |
11336 | Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); |
11337 | Info.offset = 0; |
11338 | Info.align.reset(); |
11339 | |
11340 | Info.flags = MachineMemOperand::MOLoad; |
11341 | return true; |
11342 | } |
11343 | case Intrinsic::aarch64_neon_st2: |
11344 | case Intrinsic::aarch64_neon_st3: |
11345 | case Intrinsic::aarch64_neon_st4: |
11346 | case Intrinsic::aarch64_neon_st1x2: |
11347 | case Intrinsic::aarch64_neon_st1x3: |
11348 | case Intrinsic::aarch64_neon_st1x4: |
11349 | case Intrinsic::aarch64_neon_st2lane: |
11350 | case Intrinsic::aarch64_neon_st3lane: |
11351 | case Intrinsic::aarch64_neon_st4lane: { |
11352 | Info.opc = ISD::INTRINSIC_VOID; |
11353 | |
11354 | unsigned NumElts = 0; |
11355 | for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { |
11356 | Type *ArgTy = I.getArgOperand(ArgI)->getType(); |
11357 | if (!ArgTy->isVectorTy()) |
11358 | break; |
11359 | NumElts += DL.getTypeSizeInBits(ArgTy) / 64; |
11360 | } |
11361 | Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); |
11362 | Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); |
11363 | Info.offset = 0; |
11364 | Info.align.reset(); |
11365 | |
11366 | Info.flags = MachineMemOperand::MOStore; |
11367 | return true; |
11368 | } |
11369 | case Intrinsic::aarch64_ldaxr: |
11370 | case Intrinsic::aarch64_ldxr: { |
11371 | PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); |
11372 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11373 | Info.memVT = MVT::getVT(PtrTy->getElementType()); |
11374 | Info.ptrVal = I.getArgOperand(0); |
11375 | Info.offset = 0; |
11376 | Info.align = DL.getABITypeAlign(PtrTy->getElementType()); |
11377 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; |
11378 | return true; |
11379 | } |
11380 | case Intrinsic::aarch64_stlxr: |
11381 | case Intrinsic::aarch64_stxr: { |
11382 | PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); |
11383 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11384 | Info.memVT = MVT::getVT(PtrTy->getElementType()); |
11385 | Info.ptrVal = I.getArgOperand(1); |
11386 | Info.offset = 0; |
11387 | Info.align = DL.getABITypeAlign(PtrTy->getElementType()); |
11388 | Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; |
11389 | return true; |
11390 | } |
11391 | case Intrinsic::aarch64_ldaxp: |
11392 | case Intrinsic::aarch64_ldxp: |
11393 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11394 | Info.memVT = MVT::i128; |
11395 | Info.ptrVal = I.getArgOperand(0); |
11396 | Info.offset = 0; |
11397 | Info.align = Align(16); |
11398 | Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; |
11399 | return true; |
11400 | case Intrinsic::aarch64_stlxp: |
11401 | case Intrinsic::aarch64_stxp: |
11402 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11403 | Info.memVT = MVT::i128; |
11404 | Info.ptrVal = I.getArgOperand(2); |
11405 | Info.offset = 0; |
11406 | Info.align = Align(16); |
11407 | Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; |
11408 | return true; |
11409 | case Intrinsic::aarch64_sve_ldnt1: { |
11410 | PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); |
11411 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11412 | Info.memVT = MVT::getVT(I.getType()); |
11413 | Info.ptrVal = I.getArgOperand(1); |
11414 | Info.offset = 0; |
11415 | Info.align = DL.getABITypeAlign(PtrTy->getElementType()); |
11416 | Info.flags = MachineMemOperand::MOLoad; |
11417 | if (Intrinsic == Intrinsic::aarch64_sve_ldnt1) |
11418 | Info.flags |= MachineMemOperand::MONonTemporal; |
11419 | return true; |
11420 | } |
11421 | case Intrinsic::aarch64_sve_stnt1: { |
11422 | PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType()); |
11423 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
11424 | Info.memVT = MVT::getVT(I.getOperand(0)->getType()); |
11425 | Info.ptrVal = I.getArgOperand(2); |
11426 | Info.offset = 0; |
11427 | Info.align = DL.getABITypeAlign(PtrTy->getElementType()); |
11428 | Info.flags = MachineMemOperand::MOStore; |
11429 | if (Intrinsic == Intrinsic::aarch64_sve_stnt1) |
11430 | Info.flags |= MachineMemOperand::MONonTemporal; |
11431 | return true; |
11432 | } |
11433 | default: |
11434 | break; |
11435 | } |
11436 | |
11437 | return false; |
11438 | } |
11439 | |
11440 | bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load, |
11441 | ISD::LoadExtType ExtTy, |
11442 | EVT NewVT) const { |
11443 | |
11444 | if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT)) |
11445 | return false; |
11446 | |
11447 | |
11448 | |
11449 | if (ExtTy != ISD::NON_EXTLOAD) |
11450 | return true; |
11451 | |
11452 | |
11453 | MemSDNode *Mem = dyn_cast<MemSDNode>(Load); |
11454 | assert(Mem); |
11455 | const SDValue &Base = Mem->getBasePtr(); |
11456 | if (Base.getOpcode() == ISD::ADD && |
11457 | Base.getOperand(1).getOpcode() == ISD::SHL && |
11458 | Base.getOperand(1).hasOneUse() && |
11459 | Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) { |
11460 | |
11461 | |
11462 | uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1); |
11463 | uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8; |
11464 | if (ShiftAmount == Log2_32(LoadBytes)) |
11465 | return false; |
11466 | } |
11467 | |
11468 | return true; |
11469 | } |
11470 | |
11471 | |
11472 | bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { |
11473 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
11474 | return false; |
11475 | uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize(); |
11476 | uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize(); |
11477 | return NumBits1 > NumBits2; |
11478 | } |
11479 | bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { |
11480 | if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) |
11481 | return false; |
11482 | uint64_t NumBits1 = VT1.getFixedSizeInBits(); |
11483 | uint64_t NumBits2 = VT2.getFixedSizeInBits(); |
11484 | return NumBits1 > NumBits2; |
11485 | } |
11486 | |
11487 | |
11488 | |
11489 | |
11490 | bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { |
11491 | if (I->getOpcode() != Instruction::FMul) |
11492 | return true; |
11493 | |
11494 | if (!I->hasOneUse()) |
11495 | return true; |
11496 | |
11497 | Instruction *User = I->user_back(); |
11498 | |
11499 | if (User && |
11500 | !(User->getOpcode() == Instruction::FSub || |
11501 | User->getOpcode() == Instruction::FAdd)) |
11502 | return true; |
11503 | |
11504 | const TargetOptions &Options = getTargetMachine().Options; |
11505 | const Function *F = I->getFunction(); |
11506 | const DataLayout &DL = F->getParent()->getDataLayout(); |
11507 | Type *Ty = User->getOperand(0)->getType(); |
11508 | |
11509 | return !(isFMAFasterThanFMulAndFAdd(*F, Ty) && |
11510 | isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && |
11511 | (Options.AllowFPOpFusion == FPOpFusion::Fast || |
11512 | Options.UnsafeFPMath)); |
11513 | } |
11514 | |
11515 | |
11516 | |
11517 | bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { |
11518 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
11519 | return false; |
11520 | unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); |
11521 | unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); |
11522 | return NumBits1 == 32 && NumBits2 == 64; |
11523 | } |
11524 | bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { |
11525 | if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) |
11526 | return false; |
11527 | unsigned NumBits1 = VT1.getSizeInBits(); |
11528 | unsigned NumBits2 = VT2.getSizeInBits(); |
11529 | return NumBits1 == 32 && NumBits2 == 64; |
11530 | } |
11531 | |
11532 | bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
11533 | EVT VT1 = Val.getValueType(); |
11534 | if (isZExtFree(VT1, VT2)) { |
11535 | return true; |
11536 | } |
11537 | |
11538 | if (Val.getOpcode() != ISD::LOAD) |
11539 | return false; |
11540 | |
11541 | |
11542 | return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() && |
11543 | VT2.isSimple() && !VT2.isVector() && VT2.isInteger() && |
11544 | VT1.getSizeInBits() <= 32); |
11545 | } |
11546 | |
11547 | bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { |
11548 | if (isa<FPExtInst>(Ext)) |
11549 | return false; |
11550 | |
11551 | |
11552 | if (Ext->getType()->isVectorTy()) |
11553 | return false; |
11554 | |
11555 | for (const Use &U : Ext->uses()) { |
11556 | |
11557 | |
11558 | |
11559 | |
11560 | const Instruction *Instr = cast<Instruction>(U.getUser()); |
11561 | |
11562 | |
11563 | switch (Instr->getOpcode()) { |
11564 | case Instruction::Shl: |
11565 | if (!isa<ConstantInt>(Instr->getOperand(1))) |
11566 | return false; |
11567 | break; |
11568 | case Instruction::GetElementPtr: { |
11569 | gep_type_iterator GTI = gep_type_begin(Instr); |
11570 | auto &DL = Ext->getModule()->getDataLayout(); |
11571 | std::advance(GTI, U.getOperandNo()-1); |
11572 | Type *IdxTy = GTI.getIndexedType(); |
11573 | |
11574 | |
11575 | |
11576 | |
11577 | uint64_t ShiftAmt = |
11578 | countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3; |
11579 | |
11580 | |
11581 | if (ShiftAmt == 0 || ShiftAmt > 4) |
11582 | return false; |
11583 | break; |
11584 | } |
11585 | case Instruction::Trunc: |
11586 | |
11587 | |
11588 | if (Instr->getType() == Ext->getOperand(0)->getType()) |
11589 | continue; |
11590 | LLVM_FALLTHROUGH; |
11591 | default: |
11592 | return false; |
11593 | } |
11594 | |
11595 | |
11596 | |
11597 | } |
11598 | return true; |
11599 | } |
11600 | |
11601 | |
11602 | |
11603 | static bool areExtractShuffleVectors(Value *Op1, Value *Op2) { |
11604 | auto areTypesHalfed = [](Value *FullV, Value *HalfV) { |
11605 | auto *FullTy = FullV->getType(); |
11606 | auto *HalfTy = HalfV->getType(); |
11607 | return FullTy->getPrimitiveSizeInBits().getFixedSize() == |
11608 | 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize(); |
11609 | }; |
11610 | |
11611 | auto extractHalf = [](Value *FullV, Value *HalfV) { |
11612 | auto *FullVT = cast<FixedVectorType>(FullV->getType()); |
11613 | auto *HalfVT = cast<FixedVectorType>(HalfV->getType()); |
11614 | return FullVT->getNumElements() == 2 * HalfVT->getNumElements(); |
11615 | }; |
11616 | |
11617 | ArrayRef<int> M1, M2; |
11618 | Value *S1Op1, *S2Op1; |
11619 | if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) || |
11620 | !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2)))) |
11621 | return false; |
11622 | |
11623 | |
11624 | |
11625 | if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) || |
11626 | !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2)) |
11627 | return false; |
11628 | |
11629 | |
11630 | |
11631 | int M1Start = -1; |
11632 | int M2Start = -1; |
11633 | int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2; |
11634 | if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) || |
11635 | !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) || |
11636 | M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2))) |
11637 | return false; |
11638 | |
11639 | return true; |
11640 | } |
11641 | |
11642 | |
11643 | |
11644 | static bool areExtractExts(Value *Ext1, Value *Ext2) { |
11645 | auto areExtDoubled = [](Instruction *Ext) { |
11646 | return Ext->getType()->getScalarSizeInBits() == |
11647 | 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits(); |
11648 | }; |
11649 | |
11650 | if (!match(Ext1, m_ZExtOrSExt(m_Value())) || |
11651 | !match(Ext2, m_ZExtOrSExt(m_Value())) || |
11652 | !areExtDoubled(cast<Instruction>(Ext1)) || |
11653 | !areExtDoubled(cast<Instruction>(Ext2))) |
11654 | return false; |
11655 | |
11656 | return true; |
11657 | } |
11658 | |
11659 | |
11660 | static bool isOperandOfVmullHighP64(Value *Op) { |
11661 | Value *VectorOperand = nullptr; |
11662 | ConstantInt *ElementIndex = nullptr; |
11663 | return match(Op, m_ExtractElt(m_Value(VectorOperand), |
11664 | m_ConstantInt(ElementIndex))) && |
11665 | ElementIndex->getValue() == 1 && |
11666 | isa<FixedVectorType>(VectorOperand->getType()) && |
11667 | cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2; |
11668 | } |
11669 | |
11670 | |
11671 | static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) { |
11672 | return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2); |
11673 | } |
11674 | |
11675 | |
11676 | |
11677 | |
11678 | bool AArch64TargetLowering::shouldSinkOperands( |
11679 | Instruction *I, SmallVectorImpl<Use *> &Ops) const { |
11680 | if (!I->getType()->isVectorTy()) |
11681 | return false; |
11682 | |
11683 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { |
11684 | switch (II->getIntrinsicID()) { |
11685 | case Intrinsic::aarch64_neon_umull: |
11686 | if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) |
11687 | return false; |
11688 | Ops.push_back(&II->getOperandUse(0)); |
11689 | Ops.push_back(&II->getOperandUse(1)); |
11690 | return true; |
11691 | |
11692 | case Intrinsic::aarch64_neon_pmull64: |
11693 | if (!areOperandsOfVmullHighP64(II->getArgOperand(0), |
11694 | II->getArgOperand(1))) |
11695 | return false; |
11696 | Ops.push_back(&II->getArgOperandUse(0)); |
11697 | Ops.push_back(&II->getArgOperandUse(1)); |
11698 | return true; |
11699 | |
11700 | default: |
11701 | return false; |
11702 | } |
11703 | } |
11704 | |
11705 | switch (I->getOpcode()) { |
11706 | case Instruction::Sub: |
11707 | case Instruction::Add: { |
11708 | if (!areExtractExts(I->getOperand(0), I->getOperand(1))) |
11709 | return false; |
11710 | |
11711 | |
11712 | |
11713 | auto Ext1 = cast<Instruction>(I->getOperand(0)); |
11714 | auto Ext2 = cast<Instruction>(I->getOperand(1)); |
11715 | if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) { |
11716 | Ops.push_back(&Ext1->getOperandUse(0)); |
11717 | Ops.push_back(&Ext2->getOperandUse(0)); |
11718 | } |
11719 | |
11720 | Ops.push_back(&I->getOperandUse(0)); |
11721 | Ops.push_back(&I->getOperandUse(1)); |
11722 | |
11723 | return true; |
11724 | } |
11725 | case Instruction::Mul: { |
11726 | bool IsProfitable = false; |
11727 | for (auto &Op : I->operands()) { |
11728 | |
11729 | if (any_of(Ops, [&](Use *U) { return U->get() == Op; })) |
11730 | continue; |
11731 | |
11732 | ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op); |
11733 | if (!Shuffle || !Shuffle->isZeroEltSplat()) |
11734 | continue; |
11735 | |
11736 | Value *ShuffleOperand = Shuffle->getOperand(0); |
11737 | InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand); |
11738 | if (!Insert) |
11739 | continue; |
11740 | |
11741 | Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1)); |
11742 | if (!OperandInstr) |
11743 | continue; |
11744 | |
11745 | ConstantInt *ElementConstant = |
11746 | dyn_cast<ConstantInt>(Insert->getOperand(2)); |
11747 | |
11748 | if (!ElementConstant || ElementConstant->getZExtValue() != 0) |
11749 | continue; |
11750 | |
11751 | unsigned Opcode = OperandInstr->getOpcode(); |
11752 | if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt) |
11753 | continue; |
11754 | |
11755 | Ops.push_back(&Shuffle->getOperandUse(0)); |
11756 | Ops.push_back(&Op); |
11757 | IsProfitable = true; |
11758 | } |
11759 | |
11760 | return IsProfitable; |
11761 | } |
11762 | default: |
11763 | return false; |
11764 | } |
11765 | return false; |
11766 | } |
11767 | |
11768 | bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, |
11769 | Align &RequiredAligment) const { |
11770 | if (!LoadedType.isSimple() || |
11771 | (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) |
11772 | return false; |
11773 | |
11774 | RequiredAligment = Align(1); |
11775 | unsigned NumBits = LoadedType.getSizeInBits(); |
11776 | return NumBits == 32 || NumBits == 64; |
11777 | } |
11778 | |
11779 | |
11780 | |
11781 | unsigned |
11782 | AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy, |
11783 | const DataLayout &DL) const { |
11784 | return (DL.getTypeSizeInBits(VecTy) + 127) / 128; |
11785 | } |
11786 | |
11787 | MachineMemOperand::Flags |
11788 | AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const { |
11789 | if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && |
11790 | I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr) |
11791 | return MOStridedAccess; |
11792 | return MachineMemOperand::MONone; |
11793 | } |
11794 | |
11795 | bool AArch64TargetLowering::isLegalInterleavedAccessType( |
11796 | VectorType *VecTy, const DataLayout &DL) const { |
11797 | |
11798 | unsigned VecSize = DL.getTypeSizeInBits(VecTy); |
11799 | unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); |
11800 | |
11801 | |
11802 | if (cast<FixedVectorType>(VecTy)->getNumElements() < 2) |
11803 | return false; |
11804 | |
11805 | |
11806 | if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64) |
11807 | return false; |
11808 | |
11809 | |
11810 | |
11811 | return VecSize == 64 || VecSize % 128 == 0; |
11812 | } |
11813 | |
11814 | |
11815 | |
11816 | |
11817 | |
11818 | |
11819 | |
11820 | |
11821 | |
11822 | |
11823 | |
11824 | |
11825 | bool AArch64TargetLowering::lowerInterleavedLoad( |
11826 | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
11827 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
11828 | assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && |
11829 | "Invalid interleave factor"); |
11830 | assert(!Shuffles.empty() && "Empty shufflevector input"); |
11831 | assert(Shuffles.size() == Indices.size() && |
11832 | "Unmatched number of shufflevectors and indices"); |
11833 | |
11834 | const DataLayout &DL = LI->getModule()->getDataLayout(); |
11835 | |
11836 | VectorType *VTy = Shuffles[0]->getType(); |
11837 | |
11838 | |
11839 | |
11840 | |
11841 | if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL)) |
11842 | return false; |
11843 | |
11844 | unsigned NumLoads = getNumInterleavedAccesses(VTy, DL); |
11845 | |
11846 | auto *FVTy = cast<FixedVectorType>(VTy); |
11847 | |
11848 | |
11849 | |
11850 | Type *EltTy = FVTy->getElementType(); |
11851 | if (EltTy->isPointerTy()) |
11852 | FVTy = |
11853 | FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements()); |
11854 | |
11855 | IRBuilder<> Builder(LI); |
11856 | |
11857 | |
11858 | Value *BaseAddr = LI->getPointerOperand(); |
11859 | |
11860 | if (NumLoads > 1) { |
11861 | |
11862 | |
11863 | FVTy = FixedVectorType::get(FVTy->getElementType(), |
11864 | FVTy->getNumElements() / NumLoads); |
11865 | |
11866 | |
11867 | |
11868 | |
11869 | BaseAddr = Builder.CreateBitCast( |
11870 | BaseAddr, |
11871 | FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())); |
11872 | } |
11873 | |
11874 | Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace()); |
11875 | Type *Tys[2] = {FVTy, PtrTy}; |
11876 | static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2, |
11877 | Intrinsic::aarch64_neon_ld3, |
11878 | Intrinsic::aarch64_neon_ld4}; |
11879 | Function *LdNFunc = |
11880 | Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); |
11881 | |
11882 | |
11883 | |
11884 | |
11885 | DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs; |
11886 | |
11887 | for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { |
11888 | |
11889 | |
11890 | |
11891 | if (LoadCount > 0) |
11892 | BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr, |
11893 | FVTy->getNumElements() * Factor); |
11894 | |
11895 | CallInst *LdN = Builder.CreateCall( |
11896 | LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN"); |
11897 | |
11898 | |
11899 | for (unsigned i = 0; i < Shuffles.size(); i++) { |
11900 | ShuffleVectorInst *SVI = Shuffles[i]; |
11901 | unsigned Index = Indices[i]; |
11902 | |
11903 | Value *SubVec = Builder.CreateExtractValue(LdN, Index); |
11904 | |
11905 | |
11906 | if (EltTy->isPointerTy()) |
11907 | SubVec = Builder.CreateIntToPtr( |
11908 | SubVec, FixedVectorType::get(SVI->getType()->getElementType(), |
11909 | FVTy->getNumElements())); |
11910 | SubVecs[SVI].push_back(SubVec); |
11911 | } |
11912 | } |
11913 | |
11914 | |
11915 | |
11916 | |
11917 | |
11918 | for (ShuffleVectorInst *SVI : Shuffles) { |
11919 | auto &SubVec = SubVecs[SVI]; |
11920 | auto *WideVec = |
11921 | SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; |
11922 | SVI->replaceAllUsesWith(WideVec); |
11923 | } |
11924 | |
11925 | return true; |
11926 | } |
11927 | |
11928 | |
11929 | |
11930 | |
11931 | |
11932 | |
11933 | |
11934 | |
11935 | |
11936 | |
11937 | |
11938 | |
11939 | |
11940 | |
11941 | |
11942 | |
11943 | |
11944 | |
11945 | |
11946 | |
11947 | |
11948 | |
11949 | |
11950 | |
11951 | |
11952 | |
11953 | |
11954 | bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, |
11955 | ShuffleVectorInst *SVI, |
11956 | unsigned Factor) const { |
11957 | assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && |
11958 | "Invalid interleave factor"); |
11959 | |
11960 | auto *VecTy = cast<FixedVectorType>(SVI->getType()); |
11961 | assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); |
11962 | |
11963 | unsigned LaneLen = VecTy->getNumElements() / Factor; |
11964 | Type *EltTy = VecTy->getElementType(); |
11965 | auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen); |
11966 | |
11967 | const DataLayout &DL = SI->getModule()->getDataLayout(); |
11968 | |
11969 | |
11970 | |
11971 | |
11972 | if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) |
11973 | return false; |
11974 | |
11975 | unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); |
11976 | |
11977 | Value *Op0 = SVI->getOperand(0); |
11978 | Value *Op1 = SVI->getOperand(1); |
11979 | IRBuilder<> Builder(SI); |
11980 | |
11981 | |
11982 | |
11983 | if (EltTy->isPointerTy()) { |
11984 | Type *IntTy = DL.getIntPtrType(EltTy); |
11985 | unsigned NumOpElts = |
11986 | cast<FixedVectorType>(Op0->getType())->getNumElements(); |
11987 | |
11988 | |
11989 | auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts); |
11990 | Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); |
11991 | Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); |
11992 | |
11993 | SubVecTy = FixedVectorType::get(IntTy, LaneLen); |
11994 | } |
11995 | |
11996 | |
11997 | Value *BaseAddr = SI->getPointerOperand(); |
11998 | |
11999 | if (NumStores > 1) { |
12000 | |
12001 | |
12002 | LaneLen /= NumStores; |
12003 | SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen); |
12004 | |
12005 | |
12006 | |
12007 | |
12008 | BaseAddr = Builder.CreateBitCast( |
12009 | BaseAddr, |
12010 | SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())); |
12011 | } |
12012 | |
12013 | auto Mask = SVI->getShuffleMask(); |
12014 | |
12015 | Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace()); |
12016 | Type *Tys[2] = {SubVecTy, PtrTy}; |
12017 | static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2, |
12018 | Intrinsic::aarch64_neon_st3, |
12019 | Intrinsic::aarch64_neon_st4}; |
12020 | Function *StNFunc = |
12021 | Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); |
12022 | |
12023 | for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { |
12024 | |
12025 | SmallVector<Value *, 5> Ops; |
12026 | |
12027 | |
12028 | for (unsigned i = 0; i < Factor; i++) { |
12029 | unsigned IdxI = StoreCount * LaneLen * Factor + i; |
12030 | if (Mask[IdxI] >= 0) { |
12031 | Ops.push_back(Builder.CreateShuffleVector( |
12032 | Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0))); |
12033 | } else { |
12034 | unsigned StartMask = 0; |
12035 | for (unsigned j = 1; j < LaneLen; j++) { |
12036 | unsigned IdxJ = StoreCount * LaneLen * Factor + j; |
12037 | if (Mask[IdxJ * Factor + IdxI] >= 0) { |
12038 | StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; |
12039 | break; |
12040 | } |
12041 | } |
12042 | |
12043 | |
12044 | |
12045 | |
12046 | |
12047 | Ops.push_back(Builder.CreateShuffleVector( |
12048 | Op0, Op1, createSequentialMask(StartMask, LaneLen, 0))); |
12049 | } |
12050 | } |
12051 | |
12052 | |
12053 | |
12054 | if (StoreCount > 0) |
12055 | BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(), |
12056 | BaseAddr, LaneLen * Factor); |
12057 | |
12058 | Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy)); |
12059 | Builder.CreateCall(StNFunc, Ops); |
12060 | } |
12061 | return true; |
12062 | } |
12063 | |
12064 | |
12065 | |
12066 | |
12067 | |
12068 | |
12069 | |
12070 | |
12071 | |
12072 | |
12073 | |
12074 | |
12075 | |
12076 | |
12077 | |
12078 | |
12079 | |
12080 | |
12081 | |
12082 | |
12083 | |
12084 | SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic, |
12085 | ArrayRef<SDValue> LoadOps, |
12086 | EVT VT, SelectionDAG &DAG, |
12087 | const SDLoc &DL) const { |
12088 | assert(VT.isScalableVector() && "Can only lower scalable vectors"); |
12089 | |
12090 | unsigned N, Opcode; |
12091 | static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = { |
12092 | {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, |
12093 | {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, |
12094 | {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; |
12095 | |
12096 | std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; |
12097 | assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && |
12098 | "invalid tuple vector type!"); |
12099 | |
12100 | EVT SplitVT = |
12101 | EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), |
12102 | VT.getVectorElementCount().divideCoefficientBy(N)); |
12103 | assert(isTypeLegal(SplitVT)); |
12104 | |
12105 | SmallVector<EVT, 5> VTs(N, SplitVT); |
12106 | VTs.push_back(MVT::Other); |
12107 | SDVTList NodeTys = DAG.getVTList(VTs); |
12108 | |
12109 | SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps); |
12110 | SmallVector<SDValue, 4> PseudoLoadOps; |
12111 | for (unsigned I = 0; I < N; ++I) |
12112 | PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I)); |
12113 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps); |
12114 | } |
12115 | |
12116 | EVT AArch64TargetLowering::getOptimalMemOpType( |
12117 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
12118 | bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat); |
12119 | bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; |
12120 | bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; |
12121 | |
12122 | |
12123 | |
12124 | bool IsSmallMemset = Op.isMemset() && Op.size() < 32; |
12125 | auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { |
12126 | if (Op.isAligned(AlignCheck)) |
12127 | return true; |
12128 | bool Fast; |
12129 | return allowsMisalignedMemoryAccesses(VT, 0, Align(1), |
12130 | MachineMemOperand::MONone, &Fast) && |
12131 | Fast; |
12132 | }; |
12133 | |
12134 | if (CanUseNEON && Op.isMemset() && !IsSmallMemset && |
12135 | AlignmentIsAcceptable(MVT::v16i8, Align(16))) |
12136 | return MVT::v16i8; |
12137 | if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16))) |
12138 | return MVT::f128; |
12139 | if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8))) |
12140 | return MVT::i64; |
12141 | if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4))) |
12142 | return MVT::i32; |
12143 | return MVT::Other; |
12144 | } |
12145 | |
12146 | LLT AArch64TargetLowering::getOptimalMemOpLLT( |
12147 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
12148 | bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat); |
12149 | bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; |
12150 | bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; |
12151 | |
12152 | |
12153 | |
12154 | bool IsSmallMemset = Op.isMemset() && Op.size() < 32; |
12155 | auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { |
12156 | if (Op.isAligned(AlignCheck)) |
12157 | return true; |
12158 | bool Fast; |
12159 | return allowsMisalignedMemoryAccesses(VT, 0, Align(1), |
12160 | MachineMemOperand::MONone, &Fast) && |
12161 | Fast; |
12162 | }; |
12163 | |
12164 | if (CanUseNEON && Op.isMemset() && !IsSmallMemset && |
12165 | AlignmentIsAcceptable(MVT::v2i64, Align(16))) |
12166 | return LLT::fixed_vector(2, 64); |
12167 | if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16))) |
12168 | return LLT::scalar(128); |
12169 | if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8))) |
12170 | return LLT::scalar(64); |
12171 | if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4))) |
12172 | return LLT::scalar(32); |
12173 | return LLT(); |
12174 | } |
12175 | |
12176 | |
12177 | bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { |
12178 | if (Immed == std::numeric_limits<int64_t>::min()) { |
12179 | LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed |
12180 | << ": avoid UB for INT64_MIN\n"); |
12181 | return false; |
12182 | } |
12183 | |
12184 | Immed = std::abs(Immed); |
12185 | bool IsLegal = ((Immed >> 12) == 0 || |
12186 | ((Immed & 0xfff) == 0 && Immed >> 24 == 0)); |
12187 | LLVM_DEBUG(dbgs() << "Is " << Immed |
12188 | << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n"); |
12189 | return IsLegal; |
12190 | } |
12191 | |
12192 | |
12193 | |
12194 | bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { |
12195 | return isLegalAddImmediate(Immed); |
12196 | } |
12197 | |
12198 | |
12199 | |
12200 | bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, |
12201 | const AddrMode &AM, Type *Ty, |
12202 | unsigned AS, Instruction *I) const { |
12203 | |
12204 | |
12205 | |
12206 | |
12207 | |
12208 | |
12209 | |
12210 | |
12211 | if (AM.BaseGV) |
12212 | return false; |
12213 | |
12214 | |
12215 | if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) |
12216 | return false; |
12217 | |
12218 | |
12219 | if (isa<ScalableVectorType>(Ty)) { |
12220 | uint64_t VecElemNumBytes = |
12221 | DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8; |
12222 | return AM.HasBaseReg && !AM.BaseOffs && |
12223 | (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes); |
12224 | } |
12225 | |
12226 | |
12227 | |
12228 | uint64_t NumBytes = 0; |
12229 | if (Ty->isSized()) { |
12230 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
12231 | NumBytes = NumBits / 8; |
12232 | if (!isPowerOf2_64(NumBits)) |
12233 | NumBytes = 0; |
12234 | } |
12235 | |
12236 | if (!AM.Scale) { |
12237 | int64_t Offset = AM.BaseOffs; |
12238 | |
12239 | |
12240 | if (isInt<9>(Offset)) |
12241 | return true; |
12242 | |
12243 | |
12244 | unsigned shift = Log2_64(NumBytes); |
12245 | if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && |
12246 | |
12247 | (Offset >> shift) << shift == Offset) |
12248 | return true; |
12249 | return false; |
12250 | } |
12251 | |
12252 | |
12253 | |
12254 | return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes); |
12255 | } |
12256 | |
12257 | bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const { |
12258 | |
12259 | return true; |
12260 | } |
12261 | |
12262 | InstructionCost AArch64TargetLowering::getScalingFactorCost( |
12263 | const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { |
12264 | |
12265 | |
12266 | |
12267 | |
12268 | |
12269 | |
12270 | |
12271 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
12272 | |
12273 | |
12274 | return AM.Scale != 0 && AM.Scale != 1; |
12275 | return -1; |
12276 | } |
12277 | |
12278 | bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd( |
12279 | const MachineFunction &MF, EVT VT) const { |
12280 | VT = VT.getScalarType(); |
12281 | |
12282 | if (!VT.isSimple()) |
12283 | return false; |
12284 | |
12285 | switch (VT.getSimpleVT().SimpleTy) { |
12286 | case MVT::f16: |
12287 | return Subtarget->hasFullFP16(); |
12288 | case MVT::f32: |
12289 | case MVT::f64: |
12290 | return true; |
12291 | default: |
12292 | break; |
12293 | } |
12294 | |
12295 | return false; |
12296 | } |
12297 | |
12298 | bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, |
12299 | Type *Ty) const { |
12300 | switch (Ty->getScalarType()->getTypeID()) { |
12301 | case Type::FloatTyID: |
12302 | case Type::DoubleTyID: |
12303 | return true; |
12304 | default: |
12305 | return false; |
12306 | } |
12307 | } |
12308 | |
12309 | bool AArch64TargetLowering::generateFMAsInMachineCombiner( |
12310 | EVT VT, CodeGenOpt::Level OptLevel) const { |
12311 | return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector(); |
12312 | } |
12313 | |
12314 | const MCPhysReg * |
12315 | AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { |
12316 | |
12317 | |
12318 | |
12319 | static const MCPhysReg ScratchRegs[] = { |
12320 | AArch64::X16, AArch64::X17, AArch64::LR, 0 |
12321 | }; |
12322 | return ScratchRegs; |
12323 | } |
12324 | |
12325 | bool |
12326 | AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, |
12327 | CombineLevel Level) const { |
12328 | N = N->getOperand(0).getNode(); |
12329 | EVT VT = N->getValueType(0); |
12330 | |
12331 | |
12332 | if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && |
12333 | isa<ConstantSDNode>(N->getOperand(1))) { |
12334 | uint64_t TruncMask = N->getConstantOperandVal(1); |
12335 | if (isMask_64(TruncMask) && |
12336 | N->getOperand(0).getOpcode() == ISD::SRL && |
12337 | isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) |
12338 | return false; |
12339 | } |
12340 | return true; |
12341 | } |
12342 | |
12343 | bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
12344 | Type *Ty) const { |
12345 | assert(Ty->isIntegerTy()); |
12346 | |
12347 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
12348 | if (BitSize == 0) |
12349 | return false; |
12350 | |
12351 | int64_t Val = Imm.getSExtValue(); |
12352 | if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize)) |
12353 | return true; |
12354 | |
12355 | if ((int64_t)Val < 0) |
12356 | Val = ~Val; |
12357 | if (BitSize == 32) |
12358 | Val &= (1LL << 32) - 1; |
12359 | |
12360 | unsigned LZ = countLeadingZeros((uint64_t)Val); |
12361 | unsigned Shift = (63 - LZ) / 16; |
12362 | |
12363 | return Shift < 3; |
12364 | } |
12365 | |
12366 | bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
12367 | unsigned Index) const { |
12368 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
12369 | return false; |
12370 | |
12371 | return (Index == 0 || Index == ResVT.getVectorNumElements()); |
12372 | } |
12373 | |
12374 | |
12375 | |
12376 | |
12377 | |
12378 | static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, |
12379 | const AArch64Subtarget *Subtarget) { |
12380 | EVT VT = N->getValueType(0); |
12381 | if (!Subtarget->hasNEON() || !VT.isVector()) |
12382 | return SDValue(); |
12383 | |
12384 | |
12385 | |
12386 | SDValue Shift = N->getOperand(0); |
12387 | SDValue Ones = N->getOperand(1); |
12388 | if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() || |
12389 | !ISD::isBuildVectorAllOnes(Ones.getNode())) |
12390 | return SDValue(); |
12391 | |
12392 | |
12393 | auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); |
12394 | EVT ShiftEltTy = Shift.getValueType().getVectorElementType(); |
12395 | if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1) |
12396 | return SDValue(); |
12397 | |
12398 | return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0)); |
12399 | } |
12400 | |
12401 | |
12402 | |
12403 | |
12404 | |
12405 | |
12406 | |
12407 | |
12408 | |
12409 | |
12410 | |
12411 | |
12412 | |
12413 | |
12414 | |
12415 | |
12416 | static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, |
12417 | SelectionDAG &DAG) { |
12418 | |
12419 | if (N->getValueType(0) != MVT::i32) |
12420 | return SDValue(); |
12421 | |
12422 | SDValue VecReduceOp0 = N->getOperand(0); |
12423 | unsigned Opcode = VecReduceOp0.getOpcode(); |
12424 | |
12425 | if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32) |
12426 | return SDValue(); |
12427 | |
12428 | SDValue ABS = VecReduceOp0; |
12429 | |
12430 | if (ABS->getOperand(0)->getOpcode() != ISD::SUB || |
12431 | ABS->getOperand(0)->getValueType(0) != MVT::v16i32) |
12432 | return SDValue(); |
12433 | |
12434 | SDValue SUB = ABS->getOperand(0); |
12435 | unsigned Opcode0 = SUB->getOperand(0).getOpcode(); |
12436 | unsigned Opcode1 = SUB->getOperand(1).getOpcode(); |
12437 | |
12438 | if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 || |
12439 | SUB->getOperand(1)->getValueType(0) != MVT::v16i32) |
12440 | return SDValue(); |
12441 | |
12442 | |
12443 | bool IsZExt = false; |
12444 | if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) { |
12445 | IsZExt = true; |
12446 | } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) { |
12447 | IsZExt = false; |
12448 | } else |
12449 | return SDValue(); |
12450 | |
12451 | SDValue EXT0 = SUB->getOperand(0); |
12452 | SDValue EXT1 = SUB->getOperand(1); |
12453 | |
12454 | if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 || |
12455 | EXT1->getOperand(0)->getValueType(0) != MVT::v16i8) |
12456 | return SDValue(); |
12457 | |
12458 | |
12459 | SDLoc DL(N); |
12460 | |
12461 | |
12462 | SDValue UABDHigh8Op0 = |
12463 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0), |
12464 | DAG.getConstant(8, DL, MVT::i64)); |
12465 | SDValue UABDHigh8Op1 = |
12466 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0), |
12467 | DAG.getConstant(8, DL, MVT::i64)); |
12468 | SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8, |
12469 | UABDHigh8Op0, UABDHigh8Op1); |
12470 | SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8); |
12471 | |
12472 | |
12473 | SDValue UABDLo8Op0 = |
12474 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0), |
12475 | DAG.getConstant(0, DL, MVT::i64)); |
12476 | SDValue UABDLo8Op1 = |
12477 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0), |
12478 | DAG.getConstant(0, DL, MVT::i64)); |
12479 | SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8, |
12480 | UABDLo8Op0, UABDLo8Op1); |
12481 | SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8); |
12482 | SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD); |
12483 | |
12484 | |
12485 | SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL); |
12486 | |
12487 | |
12488 | return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP); |
12489 | } |
12490 | |
12491 | |
12492 | |
12493 | |
12494 | static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, |
12495 | const AArch64Subtarget *ST) { |
12496 | if (!ST->hasDotProd()) |
12497 | return performVecReduceAddCombineWithUADDLP(N, DAG); |
12498 | |
12499 | SDValue Op0 = N->getOperand(0); |
12500 | if (N->getValueType(0) != MVT::i32 || |
12501 | Op0.getValueType().getVectorElementType() != MVT::i32) |
12502 | return SDValue(); |
12503 | |
12504 | unsigned ExtOpcode = Op0.getOpcode(); |
12505 | SDValue A = Op0; |
12506 | SDValue B; |
12507 | if (ExtOpcode == ISD::MUL) { |
12508 | A = Op0.getOperand(0); |
12509 | B = Op0.getOperand(1); |
12510 | if (A.getOpcode() != B.getOpcode() || |
12511 | A.getOperand(0).getValueType() != B.getOperand(0).getValueType()) |
12512 | return SDValue(); |
12513 | ExtOpcode = A.getOpcode(); |
12514 | } |
12515 | if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND) |
12516 | return SDValue(); |
12517 | |
12518 | EVT Op0VT = A.getOperand(0).getValueType(); |
12519 | if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8) |
12520 | return SDValue(); |
12521 | |
12522 | SDLoc DL(Op0); |
12523 | |
12524 | |
12525 | if (!B) |
12526 | B = DAG.getConstant(1, DL, Op0VT); |
12527 | else |
12528 | B = B.getOperand(0); |
12529 | |
12530 | SDValue Zeros = |
12531 | DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32); |
12532 | auto DotOpcode = |
12533 | (ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT; |
12534 | SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, |
12535 | A.getOperand(0), B); |
12536 | return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); |
12537 | } |
12538 | |
12539 | static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, |
12540 | TargetLowering::DAGCombinerInfo &DCI, |
12541 | const AArch64Subtarget *Subtarget) { |
12542 | if (DCI.isBeforeLegalizeOps()) |
12543 | return SDValue(); |
12544 | |
12545 | return foldVectorXorShiftIntoCmp(N, DAG, Subtarget); |
12546 | } |
12547 | |
12548 | SDValue |
12549 | AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
12550 | SelectionDAG &DAG, |
12551 | SmallVectorImpl<SDNode *> &Created) const { |
12552 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
12553 | if (isIntDivCheap(N->getValueType(0), Attr)) |
| 1 | Assuming the condition is false | |
|
| |
12554 | return SDValue(N,0); |
12555 | |
12556 | |
12557 | EVT VT = N->getValueType(0); |
12558 | if ((VT != MVT::i32 && VT != MVT::i64) || |
| |
12559 | !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) |
12560 | return SDValue(); |
12561 | |
12562 | SDLoc DL(N); |
12563 | SDValue N0 = N->getOperand(0); |
12564 | unsigned Lg2 = Divisor.countTrailingZeros(); |
12565 | SDValue Zero = DAG.getConstant(0, DL, VT); |
12566 | SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); |
12567 | |
12568 | |
12569 | SDValue CCVal; |
12570 | SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL); |
| |
12571 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); |
12572 | SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); |
12573 | |
12574 | Created.push_back(Cmp.getNode()); |
12575 | Created.push_back(Add.getNode()); |
12576 | Created.push_back(CSel.getNode()); |
12577 | |
12578 | |
12579 | SDValue SRA = |
12580 | DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64)); |
12581 | |
12582 | |
12583 | |
12584 | if (Divisor.isNonNegative()) |
12585 | return SRA; |
12586 | |
12587 | Created.push_back(SRA.getNode()); |
12588 | return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); |
12589 | } |
12590 | |
12591 | static bool IsSVECntIntrinsic(SDValue S) { |
12592 | switch(getIntrinsicID(S.getNode())) { |
12593 | default: |
12594 | break; |
12595 | case Intrinsic::aarch64_sve_cntb: |
12596 | case Intrinsic::aarch64_sve_cnth: |
12597 | case Intrinsic::aarch64_sve_cntw: |
12598 | case Intrinsic::aarch64_sve_cntd: |
12599 | return true; |
12600 | } |
12601 | return false; |
12602 | } |
12603 | |
12604 | |
12605 | |
12606 | |
12607 | |
12608 | |
12609 | |
12610 | |
12611 | |
12612 | |
12613 | |
12614 | |
12615 | |
12616 | static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) { |
12617 | switch (Extend.getOpcode()) { |
12618 | case ISD::SIGN_EXTEND: |
12619 | case ISD::ZERO_EXTEND: |
12620 | return Extend.getOperand(0).getValueType(); |
12621 | case ISD::AssertSext: |
12622 | case ISD::AssertZext: |
12623 | case ISD::SIGN_EXTEND_INREG: { |
12624 | VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1)); |
12625 | if (!TypeNode) |
12626 | return MVT::Other; |
12627 | return TypeNode->getVT(); |
12628 | } |
12629 | case ISD::AND: { |
12630 | ConstantSDNode *Constant = |
12631 | dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode()); |
12632 | if (!Constant) |
12633 | return MVT::Other; |
12634 | |
12635 | uint32_t Mask = Constant->getZExtValue(); |
12636 | |
12637 | if (Mask == UCHAR_MAX) |
12638 | return MVT::i8; |
12639 | else if (Mask == USHRT_MAX) |
12640 | return MVT::i16; |
12641 | else if (Mask == UINT_MAX) |
12642 | return MVT::i32; |
12643 | |
12644 | return MVT::Other; |
12645 | } |
12646 | default: |
12647 | return MVT::Other; |
12648 | } |
12649 | |
12650 | llvm_unreachable("Code path unhandled in calculatePreExtendType!"); |
12651 | } |
12652 | |
12653 | |
12654 | |
12655 | static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, |
12656 | SelectionDAG &DAG) { |
12657 | |
12658 | ShuffleVectorSDNode *ShuffleNode = |
12659 | dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode()); |
12660 | if (!ShuffleNode) |
12661 | return SDValue(); |
12662 | |
12663 | |
12664 | if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0) |
12665 | return SDValue(); |
12666 | |
12667 | SDValue InsertVectorElt = VectorShuffle.getOperand(0); |
12668 | |
12669 | if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT) |
12670 | return SDValue(); |
12671 | |
12672 | SDValue InsertLane = InsertVectorElt.getOperand(2); |
12673 | ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode()); |
12674 | |
12675 | if (!Constant || Constant->getZExtValue() != 0) |
12676 | return SDValue(); |
12677 | |
12678 | SDValue Extend = InsertVectorElt.getOperand(1); |
12679 | unsigned ExtendOpcode = Extend.getOpcode(); |
12680 | |
12681 | bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND || |
12682 | ExtendOpcode == ISD::SIGN_EXTEND_INREG || |
12683 | ExtendOpcode == ISD::AssertSext; |
12684 | if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND && |
12685 | ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND) |
12686 | return SDValue(); |
12687 | |
12688 | EVT TargetType = VectorShuffle.getValueType(); |
12689 | EVT PreExtendType = calculatePreExtendType(Extend, DAG); |
12690 | |
12691 | if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 && |
12692 | TargetType != MVT::v2i64) || |
12693 | (PreExtendType == MVT::Other)) |
12694 | return SDValue(); |
12695 | |
12696 | |
12697 | if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 && |
12698 | PreExtendType != MVT::i32) |
12699 | return SDValue(); |
12700 | |
12701 | EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType); |
12702 | |
12703 | if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount()) |
12704 | return SDValue(); |
12705 | |
12706 | if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2) |
12707 | return SDValue(); |
12708 | |
12709 | SDLoc DL(VectorShuffle); |
12710 | |
12711 | SDValue InsertVectorNode = DAG.getNode( |
12712 | InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT), |
12713 | DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType), |
12714 | DAG.getConstant(0, DL, MVT::i64)); |
12715 | |
12716 | std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue()); |
12717 | |
12718 | SDValue VectorShuffleNode = |
12719 | DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode, |
12720 | DAG.getUNDEF(PreExtendVT), ShuffleMask); |
12721 | |
12722 | SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, |
12723 | DL, TargetType, VectorShuffleNode); |
12724 | |
12725 | return ExtendNode; |
12726 | } |
12727 | |
12728 | |
12729 | |
12730 | static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) { |
12731 | |
12732 | if (!Mul->getValueType(0).isVector()) |
12733 | return SDValue(); |
12734 | |
12735 | SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG); |
12736 | SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG); |
12737 | |
12738 | |
12739 | if (!Op0 && !Op1) |
12740 | return SDValue(); |
12741 | |
12742 | SDLoc DL(Mul); |
12743 | return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0), |
12744 | Op0 ? Op0 : Mul->getOperand(0), |
12745 | Op1 ? Op1 : Mul->getOperand(1)); |
12746 | } |
12747 | |
12748 | static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, |
12749 | TargetLowering::DAGCombinerInfo &DCI, |
12750 | const AArch64Subtarget *Subtarget) { |
12751 | |
12752 | if (SDValue Ext = performMulVectorExtendCombine(N, DAG)) |
12753 | return Ext; |
12754 | |
12755 | if (DCI.isBeforeLegalizeOps()) |
12756 | return SDValue(); |
12757 | |
12758 | |
12759 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
12760 | return SDValue(); |
12761 | |
12762 | SDValue N0 = N->getOperand(0); |
12763 | ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1)); |
12764 | const APInt &ConstValue = C->getAPIntValue(); |
12765 | |
12766 | |
12767 | |
12768 | if (IsSVECntIntrinsic(N0) || |
12769 | (N0->getOpcode() == ISD::TRUNCATE && |
12770 | (IsSVECntIntrinsic(N0->getOperand(0))))) |
12771 | if (ConstValue.sge(1) && ConstValue.sle(16)) |
12772 | return SDValue(); |
12773 | |
12774 | |
12775 | |
12776 | |
12777 | |
12778 | |
12779 | |
12780 | |
12781 | |
12782 | |
12783 | |
12784 | |
12785 | |
12786 | |
12787 | unsigned TrailingZeroes = ConstValue.countTrailingZeros(); |
12788 | if (TrailingZeroes) { |
12789 | |
12790 | |
12791 | if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) || |
12792 | isZeroExtended(N0.getNode(), DAG))) |
12793 | return SDValue(); |
12794 | |
12795 | |
12796 | if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD || |
12797 | N->use_begin()->getOpcode() == ISD::SUB)) |
12798 | return SDValue(); |
12799 | } |
12800 | |
12801 | |
12802 | APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); |
12803 | |
12804 | unsigned ShiftAmt, AddSubOpc; |
12805 | |
12806 | bool ShiftValUseIsN0 = true; |
12807 | |
12808 | bool NegateResult = false; |
12809 | |
12810 | if (ConstValue.isNonNegative()) { |
12811 | |
12812 | |
12813 | |
12814 | APInt SCVMinus1 = ShiftedConstValue - 1; |
12815 | APInt CVPlus1 = ConstValue + 1; |
12816 | if (SCVMinus1.isPowerOf2()) { |
12817 | ShiftAmt = SCVMinus1.logBase2(); |
12818 | AddSubOpc = ISD::ADD; |
12819 | } else if (CVPlus1.isPowerOf2()) { |
12820 | ShiftAmt = CVPlus1.logBase2(); |
12821 | AddSubOpc = ISD::SUB; |
12822 | } else |
12823 | return SDValue(); |
12824 | } else { |
12825 | |
12826 | |
12827 | APInt CVNegPlus1 = -ConstValue + 1; |
12828 | APInt CVNegMinus1 = -ConstValue - 1; |
12829 | if (CVNegPlus1.isPowerOf2()) { |
12830 | ShiftAmt = CVNegPlus1.logBase2(); |
12831 | AddSubOpc = ISD::SUB; |
12832 | ShiftValUseIsN0 = false; |
12833 | } else if (CVNegMinus1.isPowerOf2()) { |
12834 | ShiftAmt = CVNegMinus1.logBase2(); |
12835 | AddSubOpc = ISD::ADD; |
12836 | NegateResult = true; |
12837 | } else |
12838 | return SDValue(); |
12839 | } |
12840 | |
12841 | SDLoc DL(N); |
12842 | EVT VT = N->getValueType(0); |
12843 | SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0, |
12844 | DAG.getConstant(ShiftAmt, DL, MVT::i64)); |
12845 | |
12846 | SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0; |
12847 | SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal; |
12848 | SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1); |
12849 | assert(!(NegateResult && TrailingZeroes) && |
12850 | "NegateResult and TrailingZeroes cannot both be true for now."); |
12851 | |
12852 | if (NegateResult) |
12853 | return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); |
12854 | |
12855 | if (TrailingZeroes) |
12856 | return DAG.getNode(ISD::SHL, DL, VT, Res, |
12857 | DAG.getConstant(TrailingZeroes, DL, MVT::i64)); |
12858 | return Res; |
12859 | } |
12860 | |
12861 | static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, |
12862 | SelectionDAG &DAG) { |
12863 | |
12864 | |
12865 | |
12866 | |
12867 | |
12868 | |
12869 | |
12870 | |
12871 | |
12872 | |
12873 | |
12874 | EVT VT = N->getValueType(0); |
12875 | if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || |
12876 | N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || |
12877 | VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits()) |
12878 | return SDValue(); |
12879 | |
12880 | |
12881 | |
12882 | |
12883 | |
12884 | if (BuildVectorSDNode *BV = |
12885 | dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) { |
12886 | |
12887 | if (!BV->isConstant()) |
12888 | return SDValue(); |
12889 | |
12890 | |
12891 | SDLoc DL(N); |
12892 | EVT IntVT = BV->getValueType(0); |
12893 | |
12894 | |
12895 | SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); |
12896 | |
12897 | SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst); |
12898 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, |
12899 | N->getOperand(0)->getOperand(0), MaskConst); |
12900 | SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd); |
12901 | return Res; |
12902 | } |
12903 | |
12904 | return SDValue(); |
12905 | } |
12906 | |
12907 | static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, |
12908 | const AArch64Subtarget *Subtarget) { |
12909 | |
12910 | |
12911 | if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) |
12912 | return Res; |
12913 | |
12914 | EVT VT = N->getValueType(0); |
12915 | if (VT != MVT::f32 && VT != MVT::f64) |
12916 | return SDValue(); |
12917 | |
12918 | |
12919 | if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) |
12920 | return SDValue(); |
12921 | |
12922 | |
12923 | |
12924 | |
12925 | SDValue N0 = N->getOperand(0); |
12926 | if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && |
12927 | |
12928 | !cast<LoadSDNode>(N0)->isVolatile()) { |
12929 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
12930 | SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), |
12931 | LN0->getPointerInfo(), LN0->getAlignment(), |
12932 | LN0->getMemOperand()->getFlags()); |
12933 | |
12934 | |
12935 | |
12936 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); |
12937 | |
12938 | unsigned Opcode = |
12939 | (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; |
12940 | return DAG.getNode(Opcode, SDLoc(N), VT, Load); |
12941 | } |
12942 | |
12943 | return SDValue(); |
12944 | } |
12945 | |
12946 | |
12947 | |
12948 | static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, |
12949 | TargetLowering::DAGCombinerInfo &DCI, |
12950 | const AArch64Subtarget *Subtarget) { |
12951 | if (!Subtarget->hasNEON()) |
12952 | return SDValue(); |
12953 | |
12954 | if (!N->getValueType(0).isSimple()) |
12955 | return SDValue(); |
12956 | |
12957 | SDValue Op = N->getOperand(0); |
12958 | if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || |
12959 | Op.getOpcode() != ISD::FMUL) |
12960 | return SDValue(); |
12961 | |
12962 | SDValue ConstVec = Op->getOperand(1); |
12963 | if (!isa<BuildVectorSDNode>(ConstVec)) |
12964 | return SDValue(); |
12965 | |
12966 | MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); |
12967 | uint32_t FloatBits = FloatTy.getSizeInBits(); |
12968 | if (FloatBits != 32 && FloatBits != 64) |
12969 | return SDValue(); |
12970 | |
12971 | MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); |
12972 | uint32_t IntBits = IntTy.getSizeInBits(); |
12973 | if (IntBits != 16 && IntBits != 32 && IntBits != 64) |
12974 | return SDValue(); |
12975 | |
12976 | |
12977 | if (IntBits > FloatBits) |
12978 | return SDValue(); |
12979 | |
12980 | BitVector UndefElements; |
12981 | BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec); |
12982 | int32_t Bits = IntBits == 64 ? 64 : 32; |
12983 | int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1); |
12984 | if (C == -1 || C == 0 || C > Bits) |
12985 | return SDValue(); |
12986 | |
12987 | MVT ResTy; |
12988 | unsigned NumLanes = Op.getValueType().getVectorNumElements(); |
12989 | switch (NumLanes) { |
12990 | default: |
12991 | return SDValue(); |
12992 | case 2: |
12993 | ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; |
12994 | break; |
12995 | case 4: |
12996 | ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64; |
12997 | break; |
12998 | } |
12999 | |
13000 | if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps()) |
13001 | return SDValue(); |
13002 | |
13003 | assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && |
13004 | "Illegal vector type after legalization"); |
13005 | |
13006 | SDLoc DL(N); |
13007 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; |
13008 | unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs |
13009 | : Intrinsic::aarch64_neon_vcvtfp2fxu; |
13010 | SDValue FixConv = |
13011 | DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, |
13012 | DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), |
13013 | Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); |
13014 | |
13015 | if (IntBits < FloatBits) |
13016 | FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); |
13017 | |
13018 | return FixConv; |
13019 | } |
13020 | |
13021 | |
13022 | |
13023 | static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, |
13024 | TargetLowering::DAGCombinerInfo &DCI, |
13025 | const AArch64Subtarget *Subtarget) { |
13026 | if (!Subtarget->hasNEON()) |
13027 | return SDValue(); |
13028 | |
13029 | SDValue Op = N->getOperand(0); |
13030 | unsigned Opc = Op->getOpcode(); |
13031 | if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || |
13032 | !Op.getOperand(0).getValueType().isSimple() || |
13033 | (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP)) |
13034 | return SDValue(); |
13035 | |
13036 | SDValue ConstVec = N->getOperand(1); |
13037 | if (!isa<BuildVectorSDNode>(ConstVec)) |
13038 | return SDValue(); |
13039 | |
13040 | MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); |
13041 | int32_t IntBits = IntTy.getSizeInBits(); |
13042 | if (IntBits != 16 && IntBits != 32 && IntBits != 64) |
13043 | return SDValue(); |
13044 | |
13045 | MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); |
13046 | int32_t FloatBits = FloatTy.getSizeInBits(); |
13047 | if (FloatBits != 32 && FloatBits != 64) |
13048 | return SDValue(); |
13049 | |
13050 | |
13051 | if (IntBits > FloatBits) |
13052 | return SDValue(); |
13053 | |
13054 | BitVector UndefElements; |
13055 | BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec); |
13056 | int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); |
13057 | if (C == -1 || C == 0 || C > FloatBits) |
13058 | return SDValue(); |
13059 | |
13060 | MVT ResTy; |
13061 | unsigned NumLanes = Op.getValueType().getVectorNumElements(); |
13062 | switch (NumLanes) { |
13063 | default: |
13064 | return SDValue(); |
13065 | case 2: |
13066 | ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; |
13067 | break; |
13068 | case 4: |
13069 | ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64; |
13070 | break; |
13071 | } |
13072 | |
13073 | if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps()) |
13074 | return SDValue(); |
13075 | |
13076 | SDLoc DL(N); |
13077 | SDValue ConvInput = Op.getOperand(0); |
13078 | bool IsSigned = Opc == ISD::SINT_TO_FP; |
13079 | if (IntBits < FloatBits) |
13080 | ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, |
13081 | ResTy, ConvInput); |
13082 | |
13083 | unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp |
13084 | : Intrinsic::aarch64_neon_vcvtfxu2fp; |
13085 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
13086 | DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput, |
13087 | DAG.getConstant(C, DL, MVT::i32)); |
13088 | } |
13089 | |
13090 | |
13091 | |
13092 | static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, |
13093 | bool &FromHi) { |
13094 | if (N.getOpcode() == ISD::SHL) |
13095 | FromHi = false; |
13096 | else if (N.getOpcode() == ISD::SRL) |
13097 | FromHi = true; |
13098 | else |
13099 | return false; |
13100 | |
13101 | if (!isa<ConstantSDNode>(N.getOperand(1))) |
13102 | return false; |
13103 | |
13104 | ShiftAmount = N->getConstantOperandVal(1); |
13105 | Src = N->getOperand(0); |
13106 | return true; |
13107 | } |
13108 | |
13109 | |
13110 | |
13111 | |
13112 | |
13113 | |
13114 | static SDValue tryCombineToEXTR(SDNode *N, |
13115 | TargetLowering::DAGCombinerInfo &DCI) { |
13116 | SelectionDAG &DAG = DCI.DAG; |
13117 | SDLoc DL(N); |
13118 | EVT VT = N->getValueType(0); |
13119 | |
13120 | assert(N->getOpcode() == ISD::OR && "Unexpected root"); |
13121 | |
13122 | if (VT != MVT::i32 && VT != MVT::i64) |
13123 | return SDValue(); |
13124 | |
13125 | SDValue LHS; |
13126 | uint32_t ShiftLHS = 0; |
13127 | bool LHSFromHi = false; |
13128 | if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) |
13129 | return SDValue(); |
13130 | |
13131 | SDValue RHS; |
13132 | uint32_t ShiftRHS = 0; |
13133 | bool RHSFromHi = false; |
13134 | if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) |
13135 | return SDValue(); |
13136 | |
13137 | |
13138 | |
13139 | if (LHSFromHi == RHSFromHi) |
13140 | return SDValue(); |
13141 | |
13142 | if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) |
13143 | return SDValue(); |
13144 | |
13145 | if (LHSFromHi) { |
13146 | std::swap(LHS, RHS); |
13147 | std::swap(ShiftLHS, ShiftRHS); |
13148 | } |
13149 | |
13150 | return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS, |
13151 | DAG.getConstant(ShiftRHS, DL, MVT::i64)); |
13152 | } |
13153 | |
13154 | static SDValue tryCombineToBSL(SDNode *N, |
13155 | TargetLowering::DAGCombinerInfo &DCI) { |
13156 | EVT VT = N->getValueType(0); |
13157 | SelectionDAG &DAG = DCI.DAG; |
13158 | SDLoc DL(N); |
13159 | |
13160 | if (!VT.isVector()) |
13161 | return SDValue(); |
13162 | |
13163 | |
13164 | |
13165 | if (!VT.is64BitVector() && !VT.is128BitVector()) |
13166 | return SDValue(); |
13167 | |
13168 | SDValue N0 = N->getOperand(0); |
13169 | if (N0.getOpcode() != ISD::AND) |
13170 | return SDValue(); |
13171 | |
13172 | SDValue N1 = N->getOperand(1); |
13173 | if (N1.getOpcode() != ISD::AND) |
13174 | return SDValue(); |
13175 | |
13176 | |
13177 | |
13178 | |
13179 | for (int i = 1; i >= 0; --i) { |
13180 | for (int j = 1; j >= 0; --j) { |
13181 | SDValue O0 = N0->getOperand(i); |
13182 | SDValue O1 = N1->getOperand(j); |
13183 | SDValue Sub, Add, SubSibling, AddSibling; |
13184 | |
13185 | |
13186 | if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) { |
13187 | Sub = O0; |
13188 | Add = O1; |
13189 | SubSibling = N0->getOperand(1 - i); |
13190 | AddSibling = N1->getOperand(1 - j); |
13191 | } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) { |
13192 | Add = O0; |
13193 | Sub = O1; |
13194 | AddSibling = N0->getOperand(1 - i); |
13195 | SubSibling = N1->getOperand(1 - j); |
13196 | } else |
13197 | continue; |
13198 | |
13199 | if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode())) |
13200 | continue; |
13201 | |
13202 | |
13203 | if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode())) |
13204 | continue; |
13205 | |
13206 | if (Sub.getOperand(1) != Add.getOperand(0)) |
13207 | continue; |
13208 | |
13209 | return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling); |
13210 | } |
13211 | } |
13212 | |
13213 | |
13214 | |
13215 | |
13216 | unsigned Bits = VT.getScalarSizeInBits(); |
13217 | uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1); |
13218 | for (int i = 1; i >= 0; --i) |
13219 | for (int j = 1; j >= 0; --j) { |
13220 | BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i)); |
13221 | BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j)); |
13222 | if (!BVN0 || !BVN1) |
13223 | continue; |
13224 | |
13225 | bool FoundMatch = true; |
13226 | for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) { |
13227 | ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k)); |
13228 | ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k)); |
13229 | if (!CN0 || !CN1 || |
13230 | CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) { |
13231 | FoundMatch = false; |
13232 | break; |
13233 | } |
13234 | } |
13235 | |
13236 | if (FoundMatch) |
13237 | return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0), |
13238 | N0->getOperand(1 - i), N1->getOperand(1 - j)); |
13239 | } |
13240 | |
13241 | return SDValue(); |
13242 | } |
13243 | |
13244 | static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
13245 | const AArch64Subtarget *Subtarget) { |
13246 | |
13247 | SelectionDAG &DAG = DCI.DAG; |
13248 | EVT VT = N->getValueType(0); |
13249 | |
13250 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
13251 | return SDValue(); |
13252 | |
13253 | if (SDValue Res = tryCombineToEXTR(N, DCI)) |
13254 | return Res; |
13255 | |
13256 | if (SDValue Res = tryCombineToBSL(N, DCI)) |
13257 | return Res; |
13258 | |
13259 | return SDValue(); |
13260 | } |
13261 | |
13262 | static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) { |
13263 | if (!MemVT.getVectorElementType().isSimple()) |
13264 | return false; |
13265 | |
13266 | uint64_t MaskForTy = 0ull; |
13267 | switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) { |
13268 | case MVT::i8: |
13269 | MaskForTy = 0xffull; |
13270 | break; |
13271 | case MVT::i16: |
13272 | MaskForTy = 0xffffull; |
13273 | break; |
13274 | case MVT::i32: |
13275 | MaskForTy = 0xffffffffull; |
13276 | break; |
13277 | default: |
13278 | return false; |
13279 | break; |
13280 | } |
13281 | |
13282 | if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR) |
13283 | if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) |
13284 | return Op0->getAPIntValue().getLimitedValue() == MaskForTy; |
13285 | |
13286 | return false; |
13287 | } |
13288 | |
13289 | static SDValue performSVEAndCombine(SDNode *N, |
13290 | TargetLowering::DAGCombinerInfo &DCI) { |
13291 | if (DCI.isBeforeLegalizeOps()) |
13292 | return SDValue(); |
13293 | |
13294 | SelectionDAG &DAG = DCI.DAG; |
13295 | SDValue Src = N->getOperand(0); |
13296 | unsigned Opc = Src->getOpcode(); |
13297 | |
13298 | |
13299 | if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) { |
13300 | SDValue UnpkOp = Src->getOperand(0); |
13301 | SDValue Dup = N->getOperand(1); |
13302 | |
13303 | if (Dup.getOpcode() != AArch64ISD::DUP) |
13304 | return SDValue(); |
13305 | |
13306 | SDLoc DL(N); |
13307 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0)); |
13308 | uint64_t ExtVal = C->getZExtValue(); |
13309 | |
13310 | |
13311 | |
13312 | EVT EltTy = UnpkOp->getValueType(0).getVectorElementType(); |
13313 | if ((ExtVal == 0xFF && EltTy == MVT::i8) || |
13314 | (ExtVal == 0xFFFF && EltTy == MVT::i16) || |
13315 | (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32)) |
13316 | return Src; |
13317 | |
13318 | |
13319 | APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits()); |
13320 | |
13321 | |
13322 | |
13323 | Dup = DAG.getNode(AArch64ISD::DUP, DL, |
13324 | UnpkOp->getValueType(0), |
13325 | DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32)); |
13326 | |
13327 | SDValue And = DAG.getNode(ISD::AND, DL, |
13328 | UnpkOp->getValueType(0), UnpkOp, Dup); |
13329 | |
13330 | return DAG.getNode(Opc, DL, N->getValueType(0), And); |
13331 | } |
13332 | |
13333 | if (!EnableCombineMGatherIntrinsics) |
13334 | return SDValue(); |
13335 | |
13336 | SDValue Mask = N->getOperand(1); |
13337 | |
13338 | if (!Src.hasOneUse()) |
13339 | return SDValue(); |
13340 | |
13341 | EVT MemVT; |
13342 | |
13343 | |
13344 | |
13345 | switch (Opc) { |
13346 | case AArch64ISD::LD1_MERGE_ZERO: |
13347 | case AArch64ISD::LDNF1_MERGE_ZERO: |
13348 | case AArch64ISD::LDFF1_MERGE_ZERO: |
13349 | MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT(); |
13350 | break; |
13351 | case AArch64ISD::GLD1_MERGE_ZERO: |
13352 | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: |
13353 | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: |
13354 | case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: |
13355 | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: |
13356 | case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: |
13357 | case AArch64ISD::GLD1_IMM_MERGE_ZERO: |
13358 | case AArch64ISD::GLDFF1_MERGE_ZERO: |
13359 | case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO: |
13360 | case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO: |
13361 | case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO: |
13362 | case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO: |
13363 | case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO: |
13364 | case AArch64ISD::GLDFF1_IMM_MERGE_ZERO: |
13365 | case AArch64ISD::GLDNT1_MERGE_ZERO: |
13366 | MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT(); |
13367 | break; |
13368 | default: |
13369 | return SDValue(); |
13370 | } |
13371 | |
13372 | if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT)) |
13373 | return Src; |
13374 | |
13375 | return SDValue(); |
13376 | } |
13377 | |
13378 | static SDValue performANDCombine(SDNode *N, |
13379 | TargetLowering::DAGCombinerInfo &DCI) { |
13380 | SelectionDAG &DAG = DCI.DAG; |
13381 | SDValue LHS = N->getOperand(0); |
13382 | EVT VT = N->getValueType(0); |
13383 | if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
13384 | return SDValue(); |
13385 | |
13386 | if (VT.isScalableVector()) |
13387 | return performSVEAndCombine(N, DCI); |
13388 | |
13389 | |
13390 | |
13391 | if (!(VT.is64BitVector() || VT.is128BitVector())) |
13392 | return SDValue(); |
13393 | |
13394 | BuildVectorSDNode *BVN = |
13395 | dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode()); |
13396 | if (!BVN) |
13397 | return SDValue(); |
13398 | |
13399 | |
13400 | |
13401 | |
13402 | |
13403 | APInt DefBits(VT.getSizeInBits(), 0); |
13404 | APInt UndefBits(VT.getSizeInBits(), 0); |
13405 | if (resolveBuildVector(BVN, DefBits, UndefBits)) { |
13406 | SDValue NewOp; |
13407 | |
13408 | DefBits = ~DefBits; |
13409 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG, |
13410 | DefBits, &LHS)) || |
13411 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG, |
13412 | DefBits, &LHS))) |
13413 | return NewOp; |
13414 | |
13415 | UndefBits = ~UndefBits; |
13416 | if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG, |
13417 | UndefBits, &LHS)) || |
13418 | (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG, |
13419 | UndefBits, &LHS))) |
13420 | return NewOp; |
13421 | } |
13422 | |
13423 | return SDValue(); |
13424 | } |
13425 | |
13426 | static SDValue performSRLCombine(SDNode *N, |
13427 | TargetLowering::DAGCombinerInfo &DCI) { |
13428 | SelectionDAG &DAG = DCI.DAG; |
13429 | EVT VT = N->getValueType(0); |
13430 | if (VT != MVT::i32 && VT != MVT::i64) |
13431 | return SDValue(); |
13432 | |
13433 | |
13434 | |
13435 | |
13436 | SDValue N0 = N->getOperand(0); |
13437 | if (N0.getOpcode() == ISD::BSWAP) { |
13438 | SDLoc DL(N); |
13439 | SDValue N1 = N->getOperand(1); |
13440 | SDValue N00 = N0.getOperand(0); |
13441 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { |
13442 | uint64_t ShiftAmt = C->getZExtValue(); |
13443 | if (VT == MVT::i32 && ShiftAmt == 16 && |
13444 | DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16))) |
13445 | return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); |
13446 | if (VT == MVT::i64 && ShiftAmt == 32 && |
13447 | DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32))) |
13448 | return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); |
13449 | } |
13450 | } |
13451 | return SDValue(); |
13452 | } |
13453 | |
13454 | |
13455 | |
13456 | |
13457 | |
13458 | |
13459 | |
13460 | |
13461 | |
13462 | |
13463 | |
13464 | |
13465 | static SDValue |
13466 | performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
13467 | SelectionDAG &DAG) { |
13468 | EVT VT = N->getValueType(0); |
13469 | |
13470 | |
13471 | |
13472 | |
13473 | |
13474 | |
13475 | SDValue Shift = N->getOperand(0); |
13476 | if (Shift.getOpcode() != AArch64ISD::VLSHR) |
13477 | return SDValue(); |
13478 | |
13479 | |
13480 | uint64_t ShiftAmount = Shift.getConstantOperandVal(1); |
13481 | if (ShiftAmount != 1) |
13482 | return SDValue(); |
13483 | |
13484 | SDValue ExtendOpA, ExtendOpB; |
13485 | SDValue ShiftOp0 = Shift.getOperand(0); |
13486 | unsigned ShiftOp0Opc = ShiftOp0.getOpcode(); |
13487 | if (ShiftOp0Opc == ISD::SUB) { |
13488 | |
13489 | SDValue Xor = ShiftOp0.getOperand(1); |
13490 | if (Xor.getOpcode() != ISD::XOR) |
13491 | return SDValue(); |
13492 | |
13493 | |
13494 | uint64_t C; |
13495 | if (!isAllConstantBuildVector(Xor.getOperand(1), C)) |
13496 | return SDValue(); |
13497 | |
13498 | unsigned ElemSizeInBits = VT.getScalarSizeInBits(); |
13499 | APInt CAsAPInt(ElemSizeInBits, C); |
13500 | if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits)) |
13501 | return SDValue(); |
13502 | |
13503 | ExtendOpA = Xor.getOperand(0); |
13504 | ExtendOpB = ShiftOp0.getOperand(0); |
13505 | } else if (ShiftOp0Opc == ISD::ADD) { |
13506 | ExtendOpA = ShiftOp0.getOperand(0); |
13507 | ExtendOpB = ShiftOp0.getOperand(1); |
13508 | } else |
13509 | return SDValue(); |
13510 | |
13511 | unsigned ExtendOpAOpc = ExtendOpA.getOpcode(); |
13512 | unsigned ExtendOpBOpc = ExtendOpB.getOpcode(); |
13513 | if (!(ExtendOpAOpc == ExtendOpBOpc && |
13514 | (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND))) |
13515 | return SDValue(); |
13516 | |
13517 | |
13518 | |
13519 | SDValue OpA = ExtendOpA.getOperand(0); |
13520 | SDValue OpB = ExtendOpB.getOperand(0); |
13521 | EVT OpAVT = OpA.getValueType(); |
13522 | assert(ExtendOpA.getValueType() == ExtendOpB.getValueType()); |
13523 | if (!(VT == OpAVT && OpAVT == OpB.getValueType())) |
13524 | return SDValue(); |
13525 | |
13526 | SDLoc DL(N); |
13527 | bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND; |
13528 | bool IsRHADD = ShiftOp0Opc == ISD::SUB; |
13529 | unsigned HADDOpc = IsSignExtend |
13530 | ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD) |
13531 | : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD); |
13532 | SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB); |
13533 | |
13534 | return ResultHADD; |
13535 | } |
13536 | |
13537 | static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) { |
13538 | switch (Opcode) { |
13539 | case ISD::FADD: |
13540 | return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64; |
13541 | case ISD::ADD: |
13542 | return VT == MVT::i64; |
13543 | default: |
13544 | return false; |
13545 | } |
13546 | } |
13547 | |
13548 | static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) { |
13549 | SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); |
13550 | ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1); |
13551 | |
13552 | EVT VT = N->getValueType(0); |
13553 | const bool FullFP16 = |
13554 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16(); |
13555 | |
13556 | |
13557 | |
13558 | |
13559 | |
13560 | |
13561 | |
13562 | |
13563 | if (ConstantN1 && ConstantN1->getZExtValue() == 0 && |
13564 | hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) { |
13565 | SDLoc DL(N0); |
13566 | SDValue N00 = N0->getOperand(0); |
13567 | SDValue N01 = N0->getOperand(1); |
13568 | |
13569 | ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01); |
13570 | SDValue Other = N00; |
13571 | |
13572 | |
13573 | if (!Shuffle) { |
13574 | Shuffle = dyn_cast<ShuffleVectorSDNode>(N00); |
13575 | Other = N01; |
13576 | } |
13577 | |
13578 | if (Shuffle && Shuffle->getMaskElt(0) == 1 && |
13579 | Other == Shuffle->getOperand(0)) { |
13580 | return DAG.getNode(N0->getOpcode(), DL, VT, |
13581 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, |
13582 | DAG.getConstant(0, DL, MVT::i64)), |
13583 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other, |
13584 | DAG.getConstant(1, DL, MVT::i64))); |
13585 | } |
13586 | } |
13587 | |
13588 | return SDValue(); |
13589 | } |
13590 | |
13591 | static SDValue performConcatVectorsCombine(SDNode *N, |
13592 | TargetLowering::DAGCombinerInfo &DCI, |
13593 | SelectionDAG &DAG) { |
13594 | SDLoc dl(N); |
13595 | EVT VT = N->getValueType(0); |
13596 | SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); |
13597 | unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode(); |
13598 | |
13599 | |
13600 | |
13601 | |
13602 | |
13603 | |
13604 | |
13605 | |
13606 | |
13607 | |
13608 | |
13609 | |
13610 | if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE && |
13611 | N1Opc == ISD::TRUNCATE) { |
13612 | SDValue N00 = N0->getOperand(0); |
13613 | SDValue N10 = N1->getOperand(0); |
13614 | EVT N00VT = N00.getValueType(); |
13615 | |
13616 | if (N00VT == N10.getValueType() && |
13617 | (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) && |
13618 | N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) { |
13619 | MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16); |
13620 | SmallVector<int, 8> Mask(MidVT.getVectorNumElements()); |
13621 | for (size_t i = 0; i < Mask.size(); ++i) |
13622 | Mask[i] = i * 2; |
13623 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
13624 | DAG.getVectorShuffle( |
13625 | MidVT, dl, |
13626 | DAG.getNode(ISD::BITCAST, dl, MidVT, N00), |
13627 | DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask)); |
13628 | } |
13629 | } |
13630 | |
13631 | |
13632 | |
13633 | if (DCI.isBeforeLegalizeOps()) |
13634 | return SDValue(); |
13635 | |
13636 | |
13637 | |
13638 | |
13639 | |
13640 | |
13641 | |
13642 | |
13643 | |
13644 | |
13645 | |
13646 | |
13647 | if (N->getNumOperands() == 2 && N0Opc == N1Opc && |
13648 | (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD || |
13649 | N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) { |
13650 | SDValue N00 = N0->getOperand(0); |
13651 | SDValue N01 = N0->getOperand(1); |
13652 | SDValue N10 = N1->getOperand(0); |
13653 | SDValue N11 = N1->getOperand(1); |
13654 | |
13655 | EVT N00VT = N00.getValueType(); |
13656 | EVT N10VT = N10.getValueType(); |
13657 | |
13658 | if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR && |
13659 | N01->getOpcode() == ISD::EXTRACT_SUBVECTOR && |
13660 | N10->getOpcode() == ISD::EXTRACT_SUBVECTOR && |
13661 | N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) { |
13662 | SDValue N00Source = N00->getOperand(0); |
13663 | SDValue N01Source = N01->getOperand(0); |
13664 | SDValue N10Source = N10->getOperand(0); |
13665 | SDValue N11Source = N11->getOperand(0); |
13666 | |
13667 | if (N00Source == N10Source && N01Source == N11Source && |
13668 | N00Source.getValueType() == VT && N01Source.getValueType() == VT) { |
13669 | assert(N0.getValueType() == N1.getValueType()); |
13670 | |
13671 | uint64_t N00Index = N00.getConstantOperandVal(1); |
13672 | uint64_t N01Index = N01.getConstantOperandVal(1); |
13673 | uint64_t N10Index = N10.getConstantOperandVal(1); |
13674 | uint64_t N11Index = N11.getConstantOperandVal(1); |
13675 | |
13676 | if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 && |
13677 | N10Index == N00VT.getVectorNumElements()) |
13678 | return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source); |
13679 | } |
13680 | } |
13681 | } |
13682 | |
13683 | |
13684 | |
13685 | |
13686 | if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) { |
13687 | assert(VT.getScalarSizeInBits() == 64); |
13688 | return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG), |
13689 | DAG.getConstant(0, dl, MVT::i64)); |
13690 | } |
13691 | |
13692 | |
13693 | |
13694 | |
13695 | |
13696 | |
13697 | |
13698 | |
13699 | |
13700 | |
13701 | if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST) |
13702 | return SDValue(); |
13703 | SDValue RHS = N1->getOperand(0); |
13704 | MVT RHSTy = RHS.getValueType().getSimpleVT(); |
13705 | |
13706 | if (!RHSTy.isVector()) |
13707 | return SDValue(); |
13708 | |
13709 | LLVM_DEBUG( |
13710 | dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"); |
13711 | |
13712 | MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), |
13713 | RHSTy.getVectorNumElements() * 2); |
13714 | return DAG.getNode(ISD::BITCAST, dl, VT, |
13715 | DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, |
13716 | DAG.getNode(ISD::BITCAST, dl, RHSTy, N0), |
13717 | RHS)); |
13718 | } |
13719 | |
13720 | static SDValue |
13721 | performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
13722 | SelectionDAG &DAG) { |
13723 | SDValue Vec = N->getOperand(0); |
13724 | SDValue SubVec = N->getOperand(1); |
13725 | uint64_t IdxVal = N->getConstantOperandVal(2); |
13726 | EVT VecVT = Vec.getValueType(); |
13727 | EVT SubVT = SubVec.getValueType(); |
13728 | |
13729 | |
13730 | if (!VecVT.isFixedLengthVector() || |
13731 | !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) || |
13732 | !DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) |
13733 | return SDValue(); |
13734 | |
13735 | |
13736 | if (IdxVal == 0 && Vec.isUndef()) |
13737 | return SDValue(); |
13738 | |
13739 | |
13740 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
13741 | if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() || |
13742 | (IdxVal != 0 && IdxVal != NumSubElts)) |
13743 | return SDValue(); |
13744 | |
13745 | |
13746 | |
13747 | |
13748 | SDLoc DL(N); |
13749 | SDValue Lo, Hi; |
13750 | if (IdxVal == 0) { |
13751 | Lo = SubVec; |
13752 | Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec, |
13753 | DAG.getVectorIdxConstant(NumSubElts, DL)); |
13754 | } else { |
13755 | Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec, |
13756 | DAG.getVectorIdxConstant(0, DL)); |
13757 | Hi = SubVec; |
13758 | } |
13759 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi); |
13760 | } |
13761 | |
13762 | static SDValue tryCombineFixedPointConvert(SDNode *N, |
13763 | TargetLowering::DAGCombinerInfo &DCI, |
13764 | SelectionDAG &DAG) { |
13765 | |
13766 | |
13767 | if (DCI.isBeforeLegalizeOps()) |
13768 | return SDValue(); |
13769 | |
13770 | |
13771 | |
13772 | |
13773 | |
13774 | |
13775 | |
13776 | |
13777 | |
13778 | |
13779 | SDValue Op1 = N->getOperand(1); |
13780 | if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
13781 | |
13782 | SDValue IID = N->getOperand(0); |
13783 | SDValue Shift = N->getOperand(2); |
13784 | SDValue Vec = Op1.getOperand(0); |
13785 | SDValue Lane = Op1.getOperand(1); |
13786 | EVT ResTy = N->getValueType(0); |
13787 | EVT VecResTy; |
13788 | SDLoc DL(N); |
13789 | |
13790 | |
13791 | |
13792 | |
13793 | assert(Vec.getValueSizeInBits() == 128 && |
13794 | "unexpected vector size on extract_vector_elt!"); |
13795 | if (Vec.getValueType() == MVT::v4i32) |
13796 | VecResTy = MVT::v4f32; |
13797 | else if (Vec.getValueType() == MVT::v2i64) |
13798 | VecResTy = MVT::v2f64; |
13799 | else |
13800 | llvm_unreachable("unexpected vector type!"); |
13801 | |
13802 | SDValue Convert = |
13803 | DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); |
13804 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); |
13805 | } |
13806 | return SDValue(); |
13807 | } |
13808 | |
13809 | |
13810 | |
13811 | |
13812 | |
13813 | |
13814 | |
13815 | |
13816 | |
13817 | |
13818 | |
13819 | |
13820 | |
13821 | |
13822 | |
13823 | |
13824 | static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { |
13825 | switch (N.getOpcode()) { |
13826 | case AArch64ISD::DUP: |
13827 | case AArch64ISD::DUPLANE8: |
13828 | case AArch64ISD::DUPLANE16: |
13829 | case AArch64ISD::DUPLANE32: |
13830 | case AArch64ISD::DUPLANE64: |
13831 | case AArch64ISD::MOVI: |
13832 | case AArch64ISD::MOVIshift: |
13833 | case AArch64ISD::MOVIedit: |
13834 | case AArch64ISD::MOVImsl: |
13835 | case AArch64ISD::MVNIshift: |
13836 | case AArch64ISD::MVNImsl: |
13837 | break; |
13838 | default: |
13839 | |
13840 | |
13841 | |
13842 | return SDValue(); |
13843 | } |
13844 | |
13845 | MVT NarrowTy = N.getSimpleValueType(); |
13846 | if (!NarrowTy.is64BitVector()) |
13847 | return SDValue(); |
13848 | |
13849 | MVT ElementTy = NarrowTy.getVectorElementType(); |
13850 | unsigned NumElems = NarrowTy.getVectorNumElements(); |
13851 | MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2); |
13852 | |
13853 | SDLoc dl(N); |
13854 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, |
13855 | DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()), |
13856 | DAG.getConstant(NumElems, dl, MVT::i64)); |
13857 | } |
13858 | |
13859 | static bool isEssentiallyExtractHighSubvector(SDValue N) { |
13860 | if (N.getOpcode() == ISD::BITCAST) |
13861 | N = N.getOperand(0); |
13862 | if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
13863 | return false; |
13864 | return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() == |
13865 | N.getOperand(0).getValueType().getVectorNumElements() / 2; |
13866 | } |
13867 | |
13868 | |
13869 | struct GenericSetCCInfo { |
13870 | const SDValue *Opnd0; |
13871 | const SDValue *Opnd1; |
13872 | ISD::CondCode CC; |
13873 | }; |
13874 | |
13875 | |
13876 | struct AArch64SetCCInfo { |
13877 | const SDValue *Cmp; |
13878 | AArch64CC::CondCode CC; |
13879 | }; |
13880 | |
13881 | |
13882 | union SetCCInfo { |
13883 | GenericSetCCInfo Generic; |
13884 | AArch64SetCCInfo AArch64; |
13885 | }; |
13886 | |
13887 | |
13888 | |
13889 | |
13890 | struct SetCCInfoAndKind { |
13891 | SetCCInfo Info; |
13892 | bool IsAArch64; |
13893 | }; |
13894 | |
13895 | |
13896 | |
13897 | |
13898 | |
13899 | |
13900 | |
13901 | static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { |
13902 | |
13903 | if (Op.getOpcode() == ISD::SETCC) { |
13904 | SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); |
13905 | SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); |
13906 | SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
13907 | SetCCInfo.IsAArch64 = false; |
13908 | return true; |
13909 | } |
13910 | |
13911 | |
13912 | |
13913 | |
13914 | if (Op.getOpcode() != AArch64ISD::CSEL) |
13915 | return false; |
13916 | |
13917 | |
13918 | SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); |
13919 | SetCCInfo.IsAArch64 = true; |
13920 | SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>( |
13921 | cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); |
13922 | |
13923 | |
13924 | |
13925 | |
13926 | ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0)); |
13927 | ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
13928 | |
13929 | |
13930 | if (!TValue || !FValue) |
13931 | return false; |
13932 | |
13933 | |
13934 | if (!TValue->isOne()) { |
13935 | |
13936 | std::swap(TValue, FValue); |
13937 | SetCCInfo.Info.AArch64.CC = |
13938 | AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC); |
13939 | } |
13940 | return TValue->isOne() && FValue->isNullValue(); |
13941 | } |
13942 | |
13943 | |
13944 | static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) { |
13945 | if (isSetCC(Op, Info)) |
13946 | return true; |
13947 | return ((Op.getOpcode() == ISD::ZERO_EXTEND) && |
13948 | isSetCC(Op->getOperand(0), Info)); |
13949 | } |
13950 | |
13951 | |
13952 | |
13953 | |
13954 | |
13955 | |
13956 | |
13957 | static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { |
13958 | assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); |
13959 | SDValue LHS = Op->getOperand(0); |
13960 | SDValue RHS = Op->getOperand(1); |
13961 | SetCCInfoAndKind InfoAndKind; |
13962 | |
13963 | |
13964 | |
13965 | |
13966 | if (isSetCCOrZExtSetCC(LHS, InfoAndKind) && |
13967 | isSetCCOrZExtSetCC(RHS, InfoAndKind)) |
13968 | return SDValue(); |
13969 | |
13970 | |
13971 | if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) { |
13972 | std::swap(LHS, RHS); |
13973 | if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) |
13974 | return SDValue(); |
13975 | } |
13976 | |
13977 | |
13978 | EVT CmpVT = InfoAndKind.IsAArch64 |
13979 | ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType() |
13980 | : InfoAndKind.Info.Generic.Opnd0->getValueType(); |
13981 | if (CmpVT != MVT::i32 && CmpVT != MVT::i64) |
13982 | return SDValue(); |
13983 | |
13984 | SDValue CCVal; |
13985 | SDValue Cmp; |
13986 | SDLoc dl(Op); |
13987 | if (InfoAndKind.IsAArch64) { |
13988 | CCVal = DAG.getConstant( |
13989 | AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl, |
13990 | MVT::i32); |
13991 | Cmp = *InfoAndKind.Info.AArch64.Cmp; |
13992 | } else |
13993 | Cmp = getAArch64Cmp( |
13994 | *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1, |
13995 | ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG, |
13996 | dl); |
13997 | |
13998 | EVT VT = Op->getValueType(0); |
13999 | LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT)); |
14000 | return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); |
14001 | } |
14002 | |
14003 | |
14004 | static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) { |
14005 | EVT VT = N->getValueType(0); |
14006 | |
14007 | if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger()) |
14008 | return SDValue(); |
14009 | |
14010 | SDValue LHS = N->getOperand(0); |
14011 | SDValue RHS = N->getOperand(1); |
14012 | if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
14013 | RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT) |
14014 | return SDValue(); |
14015 | |
14016 | auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); |
14017 | auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1)); |
14018 | if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue()) |
14019 | return SDValue(); |
14020 | |
14021 | SDValue Op1 = LHS->getOperand(0); |
14022 | SDValue Op2 = RHS->getOperand(0); |
14023 | EVT OpVT1 = Op1.getValueType(); |
14024 | EVT OpVT2 = Op2.getValueType(); |
14025 | if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 || |
14026 | Op2.getOpcode() != AArch64ISD::UADDV || |
14027 | OpVT1.getVectorElementType() != VT) |
14028 | return SDValue(); |
14029 | |
14030 | SDValue Val1 = Op1.getOperand(0); |
14031 | SDValue Val2 = Op2.getOperand(0); |
14032 | EVT ValVT = Val1->getValueType(0); |
14033 | SDLoc DL(N); |
14034 | SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2); |
14035 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
14036 | DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal), |
14037 | DAG.getConstant(0, DL, MVT::i64)); |
14038 | } |
14039 | |
14040 | |
14041 | static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) { |
14042 | EVT VT = N->getValueType(0); |
14043 | if (N->getOpcode() != ISD::ADD) |
14044 | return SDValue(); |
14045 | |
14046 | SDValue Dot = N->getOperand(0); |
14047 | SDValue A = N->getOperand(1); |
14048 | |
14049 | auto isZeroDot = [](SDValue Dot) { |
14050 | return (Dot.getOpcode() == AArch64ISD::UDOT || |
14051 | Dot.getOpcode() == AArch64ISD::SDOT) && |
14052 | isZerosVector(Dot.getOperand(0).getNode()); |
14053 | }; |
14054 | if (!isZeroDot(Dot)) |
14055 | std::swap(Dot, A); |
14056 | if (!isZeroDot(Dot)) |
14057 | return SDValue(); |
14058 | |
14059 | return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1), |
14060 | Dot.getOperand(2)); |
14061 | } |
14062 | |
14063 | |
14064 | |
14065 | |
14066 | |
14067 | |
14068 | |
14069 | |
14070 | |
14071 | |
14072 | |
14073 | |
14074 | static SDValue performAddSubLongCombine(SDNode *N, |
14075 | TargetLowering::DAGCombinerInfo &DCI, |
14076 | SelectionDAG &DAG) { |
14077 | if (DCI.isBeforeLegalizeOps()) |
14078 | return SDValue(); |
14079 | |
14080 | MVT VT = N->getSimpleValueType(0); |
14081 | if (!VT.is128BitVector()) { |
14082 | if (N->getOpcode() == ISD::ADD) |
14083 | return performSetccAddFolding(N, DAG); |
14084 | return SDValue(); |
14085 | } |
14086 | |
14087 | |
14088 | SDValue LHS = N->getOperand(0); |
14089 | SDValue RHS = N->getOperand(1); |
14090 | if ((LHS.getOpcode() != ISD::ZERO_EXTEND && |
14091 | LHS.getOpcode() != ISD::SIGN_EXTEND) || |
14092 | LHS.getOpcode() != RHS.getOpcode()) |
14093 | return SDValue(); |
14094 | |
14095 | unsigned ExtType = LHS.getOpcode(); |
14096 | |
14097 | |
14098 | |
14099 | if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) { |
14100 | RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG); |
14101 | if (!RHS.getNode()) |
14102 | return SDValue(); |
14103 | |
14104 | RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS); |
14105 | } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) { |
14106 | LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG); |
14107 | if (!LHS.getNode()) |
14108 | return SDValue(); |
14109 | |
14110 | LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS); |
14111 | } |
14112 | |
14113 | return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); |
14114 | } |
14115 | |
14116 | static SDValue performAddSubCombine(SDNode *N, |
14117 | TargetLowering::DAGCombinerInfo &DCI, |
14118 | SelectionDAG &DAG) { |
14119 | |
14120 | if (SDValue Val = performUADDVCombine(N, DAG)) |
14121 | return Val; |
14122 | if (SDValue Val = performAddDotCombine(N, DAG)) |
14123 | return Val; |
14124 | |
14125 | return performAddSubLongCombine(N, DCI, DAG); |
14126 | } |
14127 | |
14128 | |
14129 | |
14130 | |
14131 | |
14132 | |
14133 | |
14134 | |
14135 | static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, |
14136 | TargetLowering::DAGCombinerInfo &DCI, |
14137 | SelectionDAG &DAG) { |
14138 | if (DCI.isBeforeLegalizeOps()) |
14139 | return SDValue(); |
14140 | |
14141 | SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1); |
14142 | SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2); |
14143 | assert(LHS.getValueType().is64BitVector() && |
14144 | RHS.getValueType().is64BitVector() && |
14145 | "unexpected shape for long operation"); |
14146 | |
14147 | |
14148 | |
14149 | |
14150 | if (isEssentiallyExtractHighSubvector(LHS)) { |
14151 | RHS = tryExtendDUPToExtractHigh(RHS, DAG); |
14152 | if (!RHS.getNode()) |
14153 | return SDValue(); |
14154 | } else if (isEssentiallyExtractHighSubvector(RHS)) { |
14155 | LHS = tryExtendDUPToExtractHigh(LHS, DAG); |
14156 | if (!LHS.getNode()) |
14157 | return SDValue(); |
14158 | } |
14159 | |
14160 | if (IID == Intrinsic::not_intrinsic) |
14161 | return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS); |
14162 | |
14163 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), |
14164 | N->getOperand(0), LHS, RHS); |
14165 | } |
14166 | |
14167 | static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { |
14168 | MVT ElemTy = N->getSimpleValueType(0).getScalarType(); |
14169 | unsigned ElemBits = ElemTy.getSizeInBits(); |
14170 | |
14171 | int64_t ShiftAmount; |
14172 | if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) { |
14173 | APInt SplatValue, SplatUndef; |
14174 | unsigned SplatBitSize; |
14175 | bool HasAnyUndefs; |
14176 | if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, |
14177 | HasAnyUndefs, ElemBits) || |
14178 | SplatBitSize != ElemBits) |
14179 | return SDValue(); |
14180 | |
14181 | ShiftAmount = SplatValue.getSExtValue(); |
14182 | } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) { |
14183 | ShiftAmount = CVN->getSExtValue(); |
14184 | } else |
14185 | return SDValue(); |
14186 | |
14187 | unsigned Opcode; |
14188 | bool IsRightShift; |
14189 | switch (IID) { |
14190 | default: |
14191 | llvm_unreachable("Unknown shift intrinsic"); |
14192 | case Intrinsic::aarch64_neon_sqshl: |
14193 | Opcode = AArch64ISD::SQSHL_I; |
14194 | IsRightShift = false; |
14195 | break; |
14196 | case Intrinsic::aarch64_neon_uqshl: |
14197 | Opcode = AArch64ISD::UQSHL_I; |
14198 | IsRightShift = false; |
14199 | break; |
14200 | case Intrinsic::aarch64_neon_srshl: |
14201 | Opcode = AArch64ISD::SRSHR_I; |
14202 | IsRightShift = true; |
14203 | break; |
14204 | case Intrinsic::aarch64_neon_urshl: |
14205 | Opcode = AArch64ISD::URSHR_I; |
14206 | IsRightShift = true; |
14207 | break; |
14208 | case Intrinsic::aarch64_neon_sqshlu: |
14209 | Opcode = AArch64ISD::SQSHLU_I; |
14210 | IsRightShift = false; |
14211 | break; |
14212 | case Intrinsic::aarch64_neon_sshl: |
14213 | case Intrinsic::aarch64_neon_ushl: |
14214 | |
14215 | |
14216 | |
14217 | Opcode = AArch64ISD::VSHL; |
14218 | IsRightShift = false; |
14219 | break; |
14220 | } |
14221 | |
14222 | if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { |
14223 | SDLoc dl(N); |
14224 | return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), |
14225 | DAG.getConstant(-ShiftAmount, dl, MVT::i32)); |
14226 | } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) { |
14227 | SDLoc dl(N); |
14228 | return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1), |
14229 | DAG.getConstant(ShiftAmount, dl, MVT::i32)); |
14230 | } |
14231 | |
14232 | return SDValue(); |
14233 | } |
14234 | |
14235 | |
14236 | |
14237 | |
14238 | static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { |
14239 | SDValue AndN = N->getOperand(2); |
14240 | if (AndN.getOpcode() != ISD::AND) |
14241 | return SDValue(); |
14242 | |
14243 | ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1)); |
14244 | if (!CMask || CMask->getZExtValue() != Mask) |
14245 | return SDValue(); |
14246 | |
14247 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32, |
14248 | N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); |
14249 | } |
14250 | |
14251 | static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, |
14252 | SelectionDAG &DAG) { |
14253 | SDLoc dl(N); |
14254 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), |
14255 | DAG.getNode(Opc, dl, |
14256 | N->getOperand(1).getSimpleValueType(), |
14257 | N->getOperand(1)), |
14258 | DAG.getConstant(0, dl, MVT::i64)); |
14259 | } |
14260 | |
14261 | static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) { |
14262 | SDLoc DL(N); |
14263 | SDValue Op1 = N->getOperand(1); |
14264 | SDValue Op2 = N->getOperand(2); |
14265 | EVT ScalarTy = Op2.getValueType(); |
14266 | if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) |
14267 | ScalarTy = MVT::i32; |
14268 | |
14269 | |
14270 | SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0)); |
14271 | SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2); |
14272 | SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step); |
14273 | SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1); |
14274 | return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base); |
14275 | } |
14276 | |
14277 | static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) { |
14278 | SDLoc dl(N); |
14279 | SDValue Scalar = N->getOperand(3); |
14280 | EVT ScalarTy = Scalar.getValueType(); |
14281 | |
14282 | if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) |
14283 | Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); |
14284 | |
14285 | SDValue Passthru = N->getOperand(1); |
14286 | SDValue Pred = N->getOperand(2); |
14287 | return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0), |
14288 | Pred, Scalar, Passthru); |
14289 | } |
14290 | |
14291 | static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) { |
14292 | SDLoc dl(N); |
14293 | LLVMContext &Ctx = *DAG.getContext(); |
14294 | EVT VT = N->getValueType(0); |
14295 | |
14296 | assert(VT.isScalableVector() && "Expected a scalable vector."); |
14297 | |
14298 | |
14299 | if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock) |
14300 | return SDValue(); |
14301 | |
14302 | unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8; |
14303 | unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8; |
14304 | EVT ByteVT = |
14305 | EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize)); |
14306 | |
14307 | |
14308 | SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1)); |
14309 | SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2)); |
14310 | SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3), |
14311 | DAG.getConstant(ElemSize, dl, MVT::i32)); |
14312 | |
14313 | SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2); |
14314 | return DAG.getNode(ISD::BITCAST, dl, VT, EXT); |
14315 | } |
14316 | |
14317 | static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, |
14318 | TargetLowering::DAGCombinerInfo &DCI, |
14319 | SelectionDAG &DAG) { |
14320 | if (DCI.isBeforeLegalize()) |
14321 | return SDValue(); |
14322 | |
14323 | SDValue Comparator = N->getOperand(3); |
14324 | if (Comparator.getOpcode() == AArch64ISD::DUP || |
14325 | Comparator.getOpcode() == ISD::SPLAT_VECTOR) { |
14326 | unsigned IID = getIntrinsicID(N); |
14327 | EVT VT = N->getValueType(0); |
14328 | EVT CmpVT = N->getOperand(2).getValueType(); |
14329 | SDValue Pred = N->getOperand(1); |
14330 | SDValue Imm; |
14331 | SDLoc DL(N); |
14332 | |
14333 | switch (IID) { |
14334 | default: |
14335 | llvm_unreachable("Called with wrong intrinsic!"); |
14336 | break; |
14337 | |
14338 | |
14339 | case Intrinsic::aarch64_sve_cmpeq_wide: |
14340 | case Intrinsic::aarch64_sve_cmpne_wide: |
14341 | case Intrinsic::aarch64_sve_cmpge_wide: |
14342 | case Intrinsic::aarch64_sve_cmpgt_wide: |
14343 | case Intrinsic::aarch64_sve_cmplt_wide: |
14344 | case Intrinsic::aarch64_sve_cmple_wide: { |
14345 | if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) { |
14346 | int64_t ImmVal = CN->getSExtValue(); |
14347 | if (ImmVal >= -16 && ImmVal <= 15) |
14348 | Imm = DAG.getConstant(ImmVal, DL, MVT::i32); |
14349 | else |
14350 | return SDValue(); |
14351 | } |
14352 | break; |
14353 | } |
14354 | |
14355 | case Intrinsic::aarch64_sve_cmphs_wide: |
14356 | case Intrinsic::aarch64_sve_cmphi_wide: |
14357 | case Intrinsic::aarch64_sve_cmplo_wide: |
14358 | case Intrinsic::aarch64_sve_cmpls_wide: { |
14359 | if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) { |
14360 | uint64_t ImmVal = CN->getZExtValue(); |
14361 | if (ImmVal <= 127) |
14362 | Imm = DAG.getConstant(ImmVal, DL, MVT::i32); |
14363 | else |
14364 | return SDValue(); |
14365 | } |
14366 | break; |
14367 | } |
14368 | } |
14369 | |
14370 | if (!Imm) |
14371 | return SDValue(); |
14372 | |
14373 | SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm); |
14374 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred, |
14375 | N->getOperand(2), Splat, DAG.getCondCode(CC)); |
14376 | } |
14377 | |
14378 | return SDValue(); |
14379 | } |
14380 | |
14381 | static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, |
14382 | AArch64CC::CondCode Cond) { |
14383 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
14384 | |
14385 | SDLoc DL(Op); |
14386 | assert(Op.getValueType().isScalableVector() && |
14387 | TLI.isTypeLegal(Op.getValueType()) && |
14388 | "Expected legal scalable vector type!"); |
14389 | |
14390 | |
14391 | EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); |
14392 | SDValue TVal = DAG.getConstant(1, DL, OutVT); |
14393 | SDValue FVal = DAG.getConstant(0, DL, OutVT); |
14394 | |
14395 | |
14396 | SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op); |
14397 | |
14398 | |
14399 | |
14400 | SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32); |
14401 | SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test); |
14402 | return DAG.getZExtOrTrunc(Res, DL, VT); |
14403 | } |
14404 | |
14405 | static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, |
14406 | SelectionDAG &DAG) { |
14407 | SDLoc DL(N); |
14408 | |
14409 | SDValue Pred = N->getOperand(1); |
14410 | SDValue VecToReduce = N->getOperand(2); |
14411 | |
14412 | |
14413 | |
14414 | EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0)); |
14415 | SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce); |
14416 | |
14417 | |
14418 | |
14419 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
14420 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, |
14421 | Zero); |
14422 | } |
14423 | |
14424 | static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, |
14425 | SelectionDAG &DAG) { |
14426 | SDLoc DL(N); |
14427 | |
14428 | SDValue Pred = N->getOperand(1); |
14429 | SDValue VecToReduce = N->getOperand(2); |
14430 | |
14431 | EVT ReduceVT = VecToReduce.getValueType(); |
14432 | SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce); |
14433 | |
14434 | |
14435 | |
14436 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
14437 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, |
14438 | Zero); |
14439 | } |
14440 | |
14441 | static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, |
14442 | SelectionDAG &DAG) { |
14443 | SDLoc DL(N); |
14444 | |
14445 | SDValue Pred = N->getOperand(1); |
14446 | SDValue InitVal = N->getOperand(2); |
14447 | SDValue VecToReduce = N->getOperand(3); |
14448 | EVT ReduceVT = VecToReduce.getValueType(); |
14449 | |
14450 | |
14451 | |
14452 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
14453 | InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT, |
14454 | DAG.getUNDEF(ReduceVT), InitVal, Zero); |
14455 | |
14456 | SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce); |
14457 | |
14458 | |
14459 | |
14460 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, |
14461 | Zero); |
14462 | } |
14463 | |
14464 | static bool isAllActivePredicate(SDValue N) { |
14465 | unsigned NumElts = N.getValueType().getVectorMinNumElements(); |
14466 | |
14467 | |
14468 | while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) { |
14469 | N = N.getOperand(0); |
14470 | |
14471 | |
14472 | if (N.getValueType().getVectorMinNumElements() < NumElts) |
14473 | return false; |
14474 | } |
14475 | |
14476 | |
14477 | |
14478 | |
14479 | if (N.getOpcode() == AArch64ISD::PTRUE && |
14480 | N.getConstantOperandVal(0) == AArch64SVEPredPattern::all) |
14481 | return N.getValueType().getVectorMinNumElements() >= NumElts; |
14482 | |
14483 | return false; |
14484 | } |
14485 | |
14486 | |
14487 | |
14488 | |
14489 | static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, |
14490 | SelectionDAG &DAG, |
14491 | bool UnpredOp = false) { |
14492 | assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); |
14493 | assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!"); |
14494 | SDValue Pg = N->getOperand(1); |
14495 | |
14496 | |
14497 | if (isAllActivePredicate(Pg)) { |
14498 | if (UnpredOp) |
14499 | return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2), |
14500 | N->getOperand(3)); |
14501 | else |
14502 | return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, |
14503 | N->getOperand(2), N->getOperand(3)); |
14504 | } |
14505 | |
14506 | |
14507 | return SDValue(); |
14508 | } |
14509 | |
14510 | static SDValue performIntrinsicCombine(SDNode *N, |
14511 | TargetLowering::DAGCombinerInfo &DCI, |
14512 | const AArch64Subtarget *Subtarget) { |
14513 | SelectionDAG &DAG = DCI.DAG; |
14514 | unsigned IID = getIntrinsicID(N); |
14515 | switch (IID) { |
14516 | default: |
14517 | break; |
14518 | case Intrinsic::aarch64_neon_vcvtfxs2fp: |
14519 | case Intrinsic::aarch64_neon_vcvtfxu2fp: |
14520 | return tryCombineFixedPointConvert(N, DCI, DAG); |
14521 | case Intrinsic::aarch64_neon_saddv: |
14522 | return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); |
14523 | case Intrinsic::aarch64_neon_uaddv: |
14524 | return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG); |
14525 | case Intrinsic::aarch64_neon_sminv: |
14526 | return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG); |
14527 | case Intrinsic::aarch64_neon_uminv: |
14528 | return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG); |
14529 | case Intrinsic::aarch64_neon_smaxv: |
14530 | return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG); |
14531 | case Intrinsic::aarch64_neon_umaxv: |
14532 | return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); |
14533 | case Intrinsic::aarch64_neon_fmax: |
14534 | return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0), |
14535 | N->getOperand(1), N->getOperand(2)); |
14536 | case Intrinsic::aarch64_neon_fmin: |
14537 | return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0), |
14538 | N->getOperand(1), N->getOperand(2)); |
14539 | case Intrinsic::aarch64_neon_fmaxnm: |
14540 | return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0), |
14541 | N->getOperand(1), N->getOperand(2)); |
14542 | case Intrinsic::aarch64_neon_fminnm: |
14543 | return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0), |
14544 | N->getOperand(1), N->getOperand(2)); |
14545 | case Intrinsic::aarch64_neon_smull: |
14546 | case Intrinsic::aarch64_neon_umull: |
14547 | case Intrinsic::aarch64_neon_pmull: |
14548 | case Intrinsic::aarch64_neon_sqdmull: |
14549 | return tryCombineLongOpWithDup(IID, N, DCI, DAG); |
14550 | case Intrinsic::aarch64_neon_sqshl: |
14551 | case Intrinsic::aarch64_neon_uqshl: |
14552 | case Intrinsic::aarch64_neon_sqshlu: |
14553 | case Intrinsic::aarch64_neon_srshl: |
14554 | case Intrinsic::aarch64_neon_urshl: |
14555 | case Intrinsic::aarch64_neon_sshl: |
14556 | case Intrinsic::aarch64_neon_ushl: |
14557 | return tryCombineShiftImm(IID, N, DAG); |
14558 | case Intrinsic::aarch64_crc32b: |
14559 | case Intrinsic::aarch64_crc32cb: |
14560 | return tryCombineCRC32(0xff, N, DAG); |
14561 | case Intrinsic::aarch64_crc32h: |
14562 | case Intrinsic::aarch64_crc32ch: |
14563 | return tryCombineCRC32(0xffff, N, DAG); |
14564 | case Intrinsic::aarch64_sve_saddv: |
14565 | |
14566 | if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64) |
14567 | return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); |
14568 | else |
14569 | return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG); |
14570 | case Intrinsic::aarch64_sve_uaddv: |
14571 | return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG); |
14572 | case Intrinsic::aarch64_sve_smaxv: |
14573 | return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG); |
14574 | case Intrinsic::aarch64_sve_umaxv: |
14575 | return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG); |
14576 | case Intrinsic::aarch64_sve_sminv: |
14577 | return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG); |
14578 | case Intrinsic::aarch64_sve_uminv: |
14579 | return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG); |
14580 | case Intrinsic::aarch64_sve_orv: |
14581 | return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG); |
14582 | case Intrinsic::aarch64_sve_eorv: |
14583 | return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG); |
14584 | case Intrinsic::aarch64_sve_andv: |
14585 | return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG); |
14586 | case Intrinsic::aarch64_sve_index: |
14587 | return LowerSVEIntrinsicIndex(N, DAG); |
14588 | case Intrinsic::aarch64_sve_dup: |
14589 | return LowerSVEIntrinsicDUP(N, DAG); |
14590 | case Intrinsic::aarch64_sve_dup_x: |
14591 | return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0), |
14592 | N->getOperand(1)); |
14593 | case Intrinsic::aarch64_sve_ext: |
14594 | return LowerSVEIntrinsicEXT(N, DAG); |
14595 | case Intrinsic::aarch64_sve_mul: |
14596 | return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG); |
14597 | case Intrinsic::aarch64_sve_smulh: |
14598 | return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG); |
14599 | case Intrinsic::aarch64_sve_umulh: |
14600 | return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG); |
14601 | case Intrinsic::aarch64_sve_smin: |
14602 | return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); |
14603 | case Intrinsic::aarch64_sve_umin: |
14604 | return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); |
14605 | case Intrinsic::aarch64_sve_smax: |
14606 | return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); |
14607 | case Intrinsic::aarch64_sve_umax: |
14608 | return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); |
14609 | case Intrinsic::aarch64_sve_lsl: |
14610 | return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); |
14611 | case Intrinsic::aarch64_sve_lsr: |
14612 | return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); |
14613 | case Intrinsic::aarch64_sve_asr: |
14614 | return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); |
14615 | case Intrinsic::aarch64_sve_fadd: |
14616 | return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG); |
14617 | case Intrinsic::aarch64_sve_fsub: |
14618 | return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG); |
14619 | case Intrinsic::aarch64_sve_fmul: |
14620 | return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG); |
14621 | case Intrinsic::aarch64_sve_add: |
14622 | return convertMergedOpToPredOp(N, ISD::ADD, DAG, true); |
14623 | case Intrinsic::aarch64_sve_sub: |
14624 | return convertMergedOpToPredOp(N, ISD::SUB, DAG, true); |
14625 | case Intrinsic::aarch64_sve_and: |
14626 | return convertMergedOpToPredOp(N, ISD::AND, DAG, true); |
14627 | case Intrinsic::aarch64_sve_bic: |
14628 | return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true); |
14629 | case Intrinsic::aarch64_sve_eor: |
14630 | return convertMergedOpToPredOp(N, ISD::XOR, DAG, true); |
14631 | case Intrinsic::aarch64_sve_orr: |
14632 | return convertMergedOpToPredOp(N, ISD::OR, DAG, true); |
14633 | case Intrinsic::aarch64_sve_sqadd: |
14634 | return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true); |
14635 | case Intrinsic::aarch64_sve_sqsub: |
14636 | return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true); |
14637 | case Intrinsic::aarch64_sve_uqadd: |
14638 | return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true); |
14639 | case Intrinsic::aarch64_sve_uqsub: |
14640 | return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true); |
14641 | case Intrinsic::aarch64_sve_sqadd_x: |
14642 | return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0), |
14643 | N->getOperand(1), N->getOperand(2)); |
14644 | case Intrinsic::aarch64_sve_sqsub_x: |
14645 | return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0), |
14646 | N->getOperand(1), N->getOperand(2)); |
14647 | case Intrinsic::aarch64_sve_uqadd_x: |
14648 | return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0), |
14649 | N->getOperand(1), N->getOperand(2)); |
14650 | case Intrinsic::aarch64_sve_uqsub_x: |
14651 | return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), |
14652 | N->getOperand(1), N->getOperand(2)); |
14653 | case Intrinsic::aarch64_sve_cmphs: |
14654 | if (!N->getOperand(2).getValueType().isFloatingPoint()) |
14655 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14656 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14657 | N->getOperand(3), DAG.getCondCode(ISD::SETUGE)); |
14658 | break; |
14659 | case Intrinsic::aarch64_sve_cmphi: |
14660 | if (!N->getOperand(2).getValueType().isFloatingPoint()) |
14661 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14662 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14663 | N->getOperand(3), DAG.getCondCode(ISD::SETUGT)); |
14664 | break; |
14665 | case Intrinsic::aarch64_sve_fcmpge: |
14666 | case Intrinsic::aarch64_sve_cmpge: |
14667 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14668 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14669 | N->getOperand(3), DAG.getCondCode(ISD::SETGE)); |
14670 | break; |
14671 | case Intrinsic::aarch64_sve_fcmpgt: |
14672 | case Intrinsic::aarch64_sve_cmpgt: |
14673 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14674 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14675 | N->getOperand(3), DAG.getCondCode(ISD::SETGT)); |
14676 | break; |
14677 | case Intrinsic::aarch64_sve_fcmpeq: |
14678 | case Intrinsic::aarch64_sve_cmpeq: |
14679 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14680 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14681 | N->getOperand(3), DAG.getCondCode(ISD::SETEQ)); |
14682 | break; |
14683 | case Intrinsic::aarch64_sve_fcmpne: |
14684 | case Intrinsic::aarch64_sve_cmpne: |
14685 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14686 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14687 | N->getOperand(3), DAG.getCondCode(ISD::SETNE)); |
14688 | break; |
14689 | case Intrinsic::aarch64_sve_fcmpuo: |
14690 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), |
14691 | N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14692 | N->getOperand(3), DAG.getCondCode(ISD::SETUO)); |
14693 | break; |
14694 | case Intrinsic::aarch64_sve_fadda: |
14695 | return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG); |
14696 | case Intrinsic::aarch64_sve_faddv: |
14697 | return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG); |
14698 | case Intrinsic::aarch64_sve_fmaxnmv: |
14699 | return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG); |
14700 | case Intrinsic::aarch64_sve_fmaxv: |
14701 | return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG); |
14702 | case Intrinsic::aarch64_sve_fminnmv: |
14703 | return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG); |
14704 | case Intrinsic::aarch64_sve_fminv: |
14705 | return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG); |
14706 | case Intrinsic::aarch64_sve_sel: |
14707 | return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), |
14708 | N->getOperand(1), N->getOperand(2), N->getOperand(3)); |
14709 | case Intrinsic::aarch64_sve_cmpeq_wide: |
14710 | return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG); |
14711 | case Intrinsic::aarch64_sve_cmpne_wide: |
14712 | return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG); |
14713 | case Intrinsic::aarch64_sve_cmpge_wide: |
14714 | return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG); |
14715 | case Intrinsic::aarch64_sve_cmpgt_wide: |
14716 | return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG); |
14717 | case Intrinsic::aarch64_sve_cmplt_wide: |
14718 | return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG); |
14719 | case Intrinsic::aarch64_sve_cmple_wide: |
14720 | return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG); |
14721 | case Intrinsic::aarch64_sve_cmphs_wide: |
14722 | return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG); |
14723 | case Intrinsic::aarch64_sve_cmphi_wide: |
14724 | return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG); |
14725 | case Intrinsic::aarch64_sve_cmplo_wide: |
14726 | return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG); |
14727 | case Intrinsic::aarch64_sve_cmpls_wide: |
14728 | return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG); |
14729 | case Intrinsic::aarch64_sve_ptest_any: |
14730 | return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14731 | AArch64CC::ANY_ACTIVE); |
14732 | case Intrinsic::aarch64_sve_ptest_first: |
14733 | return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14734 | AArch64CC::FIRST_ACTIVE); |
14735 | case Intrinsic::aarch64_sve_ptest_last: |
14736 | return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), |
14737 | AArch64CC::LAST_ACTIVE); |
14738 | } |
14739 | return SDValue(); |
14740 | } |
14741 | |
14742 | static SDValue performExtendCombine(SDNode *N, |
14743 | TargetLowering::DAGCombinerInfo &DCI, |
14744 | SelectionDAG &DAG) { |
14745 | |
14746 | |
14747 | |
14748 | |
14749 | if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && |
14750 | (N->getOperand(0).getOpcode() == ISD::ABDU || |
14751 | N->getOperand(0).getOpcode() == ISD::ABDS)) { |
14752 | SDNode *ABDNode = N->getOperand(0).getNode(); |
14753 | SDValue NewABD = |
14754 | tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG); |
14755 | if (!NewABD.getNode()) |
14756 | return SDValue(); |
14757 | |
14758 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); |
14759 | } |
14760 | return SDValue(); |
14761 | } |
14762 | |
14763 | static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, |
14764 | SDValue SplatVal, unsigned NumVecElts) { |
14765 | assert(!St.isTruncatingStore() && "cannot split truncating vector store"); |
14766 | unsigned OrigAlignment = St.getAlignment(); |
14767 | unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8; |
14768 | |
14769 | |
14770 | |
14771 | |
14772 | |
14773 | SDLoc DL(&St); |
14774 | SDValue BasePtr = St.getBasePtr(); |
14775 | uint64_t BaseOffset = 0; |
14776 | |
14777 | const MachinePointerInfo &PtrInfo = St.getPointerInfo(); |
14778 | SDValue NewST1 = |
14779 | DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, |
14780 | OrigAlignment, St.getMemOperand()->getFlags()); |
14781 | |
14782 | |
14783 | if (BasePtr->getOpcode() == ISD::ADD && |
14784 | isa<ConstantSDNode>(BasePtr->getOperand(1))) { |
14785 | BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue(); |
14786 | BasePtr = BasePtr->getOperand(0); |
14787 | } |
14788 | |
14789 | unsigned Offset = EltOffset; |
14790 | while (--NumVecElts) { |
14791 | unsigned Alignment = MinAlign(OrigAlignment, Offset); |
14792 | SDValue OffsetPtr = |
14793 | DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, |
14794 | DAG.getConstant(BaseOffset + Offset, DL, MVT::i64)); |
14795 | NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, |
14796 | PtrInfo.getWithOffset(Offset), Alignment, |
14797 | St.getMemOperand()->getFlags()); |
14798 | Offset += EltOffset; |
14799 | } |
14800 | return NewST1; |
14801 | } |
14802 | |
14803 | |
14804 | |
14805 | static MVT getSVEContainerType(EVT ContentTy) { |
14806 | assert(ContentTy.isSimple() && "No SVE containers for extended types"); |
14807 | |
14808 | switch (ContentTy.getSimpleVT().SimpleTy) { |
14809 | default: |
14810 | llvm_unreachable("No known SVE container for this MVT type"); |
14811 | case MVT::nxv2i8: |
14812 | case MVT::nxv2i16: |
14813 | case MVT::nxv2i32: |
14814 | case MVT::nxv2i64: |
14815 | case MVT::nxv2f32: |
14816 | case MVT::nxv2f64: |
14817 | return MVT::nxv2i64; |
14818 | case MVT::nxv4i8: |
14819 | case MVT::nxv4i16: |
14820 | case MVT::nxv4i32: |
14821 | case MVT::nxv4f32: |
14822 | return MVT::nxv4i32; |
14823 | case MVT::nxv8i8: |
14824 | case MVT::nxv8i16: |
14825 | case MVT::nxv8f16: |
14826 | case MVT::nxv8bf16: |
14827 | return MVT::nxv8i16; |
14828 | case MVT::nxv16i8: |
14829 | return MVT::nxv16i8; |
14830 | } |
14831 | } |
14832 | |
14833 | static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) { |
14834 | SDLoc DL(N); |
14835 | EVT VT = N->getValueType(0); |
14836 | |
14837 | if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock) |
14838 | return SDValue(); |
14839 | |
14840 | EVT ContainerVT = VT; |
14841 | if (ContainerVT.isInteger()) |
14842 | ContainerVT = getSVEContainerType(ContainerVT); |
14843 | |
14844 | SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other); |
14845 | SDValue Ops[] = { N->getOperand(0), |
14846 | N->getOperand(2), |
14847 | N->getOperand(3), |
14848 | DAG.getValueType(VT) }; |
14849 | |
14850 | SDValue Load = DAG.getNode(Opc, DL, VTs, Ops); |
14851 | SDValue LoadChain = SDValue(Load.getNode(), 1); |
14852 | |
14853 | if (ContainerVT.isInteger() && (VT != ContainerVT)) |
14854 | Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0)); |
14855 | |
14856 | return DAG.getMergeValues({ Load, LoadChain }, DL); |
14857 | } |
14858 | |
14859 | static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) { |
14860 | SDLoc DL(N); |
14861 | EVT VT = N->getValueType(0); |
14862 | EVT PtrTy = N->getOperand(3).getValueType(); |
14863 | |
14864 | if (VT == MVT::nxv8bf16 && |
14865 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
14866 | return SDValue(); |
14867 | |
14868 | EVT LoadVT = VT; |
14869 | if (VT.isFloatingPoint()) |
14870 | LoadVT = VT.changeTypeToInteger(); |
14871 | |
14872 | auto *MINode = cast<MemIntrinsicSDNode>(N); |
14873 | SDValue PassThru = DAG.getConstant(0, DL, LoadVT); |
14874 | SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), |
14875 | MINode->getOperand(3), DAG.getUNDEF(PtrTy), |
14876 | MINode->getOperand(2), PassThru, |
14877 | MINode->getMemoryVT(), MINode->getMemOperand(), |
14878 | ISD::UNINDEXED, ISD::NON_EXTLOAD, false); |
14879 | |
14880 | if (VT.isFloatingPoint()) { |
14881 | SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) }; |
14882 | return DAG.getMergeValues(Ops, DL); |
14883 | } |
14884 | |
14885 | return L; |
14886 | } |
14887 | |
14888 | template <unsigned Opcode> |
14889 | static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) { |
14890 | static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO || |
14891 | Opcode == AArch64ISD::LD1RO_MERGE_ZERO, |
14892 | "Unsupported opcode."); |
14893 | SDLoc DL(N); |
14894 | EVT VT = N->getValueType(0); |
14895 | if (VT == MVT::nxv8bf16 && |
14896 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
14897 | return SDValue(); |
14898 | |
14899 | EVT LoadVT = VT; |
14900 | if (VT.isFloatingPoint()) |
14901 | LoadVT = VT.changeTypeToInteger(); |
14902 | |
14903 | SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)}; |
14904 | SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops); |
14905 | SDValue LoadChain = SDValue(Load.getNode(), 1); |
14906 | |
14907 | if (VT.isFloatingPoint()) |
14908 | Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0)); |
14909 | |
14910 | return DAG.getMergeValues({Load, LoadChain}, DL); |
14911 | } |
14912 | |
14913 | static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) { |
14914 | SDLoc DL(N); |
14915 | SDValue Data = N->getOperand(2); |
14916 | EVT DataVT = Data.getValueType(); |
14917 | EVT HwSrcVt = getSVEContainerType(DataVT); |
14918 | SDValue InputVT = DAG.getValueType(DataVT); |
14919 | |
14920 | if (DataVT == MVT::nxv8bf16 && |
14921 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
14922 | return SDValue(); |
14923 | |
14924 | if (DataVT.isFloatingPoint()) |
14925 | InputVT = DAG.getValueType(HwSrcVt); |
14926 | |
14927 | SDValue SrcNew; |
14928 | if (Data.getValueType().isFloatingPoint()) |
14929 | SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data); |
14930 | else |
14931 | SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data); |
14932 | |
14933 | SDValue Ops[] = { N->getOperand(0), |
14934 | SrcNew, |
14935 | N->getOperand(4), |
14936 | N->getOperand(3), |
14937 | InputVT |
14938 | }; |
14939 | |
14940 | return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops); |
14941 | } |
14942 | |
14943 | static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) { |
14944 | SDLoc DL(N); |
14945 | |
14946 | SDValue Data = N->getOperand(2); |
14947 | EVT DataVT = Data.getValueType(); |
14948 | EVT PtrTy = N->getOperand(4).getValueType(); |
14949 | |
14950 | if (DataVT == MVT::nxv8bf16 && |
14951 | !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) |
14952 | return SDValue(); |
14953 | |
14954 | if (DataVT.isFloatingPoint()) |
14955 | Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data); |
14956 | |
14957 | auto *MINode = cast<MemIntrinsicSDNode>(N); |
14958 | return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4), |
14959 | DAG.getUNDEF(PtrTy), MINode->getOperand(3), |
14960 | MINode->getMemoryVT(), MINode->getMemOperand(), |
14961 | ISD::UNINDEXED, false, false); |
14962 | } |
14963 | |
14964 | |
14965 | |
14966 | |
14967 | |
14968 | |
14969 | |
14970 | |
14971 | |
14972 | |
14973 | |
14974 | |
14975 | |
14976 | |
14977 | |
14978 | |
14979 | static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) { |
14980 | SDValue StVal = St.getValue(); |
14981 | EVT VT = StVal.getValueType(); |
14982 | |
14983 | |
14984 | if (VT.isScalableVector()) |
14985 | return SDValue(); |
14986 | |
14987 | |
14988 | |
14989 | int NumVecElts = VT.getVectorNumElements(); |
14990 | if (!(((NumVecElts == 2 || NumVecElts == 3) && |
14991 | VT.getVectorElementType().getSizeInBits() == 64) || |
14992 | ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) && |
14993 | VT.getVectorElementType().getSizeInBits() == 32))) |
14994 | return SDValue(); |
14995 | |
14996 | if (StVal.getOpcode() != ISD::BUILD_VECTOR) |
14997 | return SDValue(); |
14998 | |
14999 | |
15000 | |
15001 | |
15002 | if (!StVal.hasOneUse()) |
15003 | return SDValue(); |
15004 | |
15005 | |
15006 | |
15007 | if (St.isTruncatingStore()) |
15008 | return SDValue(); |
15009 | |
15010 | |
15011 | |
15012 | if (DAG.isBaseWithConstantOffset(St.getBasePtr())) { |
15013 | int64_t Offset = St.getBasePtr()->getConstantOperandVal(1); |
15014 | if (Offset < -512 || Offset > 504) |
15015 | return SDValue(); |
15016 | } |
15017 | |
15018 | for (int I = 0; I < NumVecElts; ++I) { |
15019 | SDValue EltVal = StVal.getOperand(I); |
15020 | if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal)) |
15021 | return SDValue(); |
15022 | } |
15023 | |
15024 | |
15025 | |
15026 | SDLoc DL(&St); |
15027 | unsigned ZeroReg; |
15028 | EVT ZeroVT; |
15029 | if (VT.getVectorElementType().getSizeInBits() == 32) { |
15030 | ZeroReg = AArch64::WZR; |
15031 | ZeroVT = MVT::i32; |
15032 | } else { |
15033 | ZeroReg = AArch64::XZR; |
15034 | ZeroVT = MVT::i64; |
15035 | } |
15036 | SDValue SplatVal = |
15037 | DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT); |
15038 | return splitStoreSplat(DAG, St, SplatVal, NumVecElts); |
15039 | } |
15040 | |
15041 | |
15042 | |
15043 | |
15044 | |
15045 | |
15046 | static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) { |
15047 | SDValue StVal = St.getValue(); |
15048 | EVT VT = StVal.getValueType(); |
15049 | |
15050 | |
15051 | |
15052 | if (VT.isFloatingPoint()) |
15053 | return SDValue(); |
15054 | |
15055 | |
15056 | unsigned NumVecElts = VT.getVectorNumElements(); |
15057 | if (NumVecElts != 4 && NumVecElts != 2) |
15058 | return SDValue(); |
15059 | |
15060 | |
15061 | |
15062 | if (St.isTruncatingStore()) |
15063 | return SDValue(); |
15064 | |
15065 | |
15066 | |
15067 | |
15068 | std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1); |
15069 | SDValue SplatVal; |
15070 | for (unsigned I = 0; I < NumVecElts; ++I) { |
15071 | |
15072 | if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) |
15073 | return SDValue(); |
15074 | |
15075 | |
15076 | if (I == 0) |
15077 | SplatVal = StVal.getOperand(1); |
15078 | else if (StVal.getOperand(1) != SplatVal) |
15079 | return SDValue(); |
15080 | |
15081 | |
15082 | ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2)); |
15083 | if (!CIndex) |
15084 | return SDValue(); |
15085 | uint64_t IndexVal = CIndex->getZExtValue(); |
15086 | if (IndexVal >= NumVecElts) |
15087 | return SDValue(); |
15088 | IndexNotInserted.reset(IndexVal); |
15089 | |
15090 | StVal = StVal.getOperand(0); |
15091 | } |
15092 | |
15093 | if (IndexNotInserted.any()) |
15094 | return SDValue(); |
15095 | |
15096 | return splitStoreSplat(DAG, St, SplatVal, NumVecElts); |
15097 | } |
15098 | |
15099 | static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
15100 | SelectionDAG &DAG, |
15101 | const AArch64Subtarget *Subtarget) { |
15102 | |
15103 | StoreSDNode *S = cast<StoreSDNode>(N); |
15104 | if (S->isVolatile() || S->isIndexed()) |
15105 | return SDValue(); |
15106 | |
15107 | SDValue StVal = S->getValue(); |
15108 | EVT VT = StVal.getValueType(); |
15109 | |
15110 | if (!VT.isFixedLengthVector()) |
15111 | return SDValue(); |
15112 | |
15113 | |
15114 | |
15115 | |
15116 | if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S)) |
15117 | return ReplacedZeroSplat; |
15118 | |
15119 | |
15120 | |
15121 | |
15122 | |
15123 | if (!Subtarget->isMisaligned128StoreSlow()) |
15124 | return SDValue(); |
15125 | |
15126 | |
15127 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
15128 | return SDValue(); |
15129 | |
15130 | |
15131 | |
15132 | if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64) |
15133 | return SDValue(); |
15134 | |
15135 | |
15136 | |
15137 | |
15138 | |
15139 | |
15140 | if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || |
15141 | S->getAlignment() <= 2) |
15142 | return SDValue(); |
15143 | |
15144 | |
15145 | |
15146 | |
15147 | if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S)) |
15148 | return ReplacedSplat; |
15149 | |
15150 | SDLoc DL(S); |
15151 | |
15152 | |
15153 | EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); |
15154 | unsigned NumElts = HalfVT.getVectorNumElements(); |
15155 | SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, |
15156 | DAG.getConstant(0, DL, MVT::i64)); |
15157 | SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, |
15158 | DAG.getConstant(NumElts, DL, MVT::i64)); |
15159 | SDValue BasePtr = S->getBasePtr(); |
15160 | SDValue NewST1 = |
15161 | DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), |
15162 | S->getAlignment(), S->getMemOperand()->getFlags()); |
15163 | SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, |
15164 | DAG.getConstant(8, DL, MVT::i64)); |
15165 | return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, |
15166 | S->getPointerInfo(), S->getAlignment(), |
15167 | S->getMemOperand()->getFlags()); |
15168 | } |
15169 | |
15170 | static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) { |
15171 | assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!"); |
15172 | |
15173 | |
15174 | if (N->getOperand(2).isUndef()) |
15175 | return N->getOperand(1); |
15176 | |
15177 | return SDValue(); |
15178 | } |
15179 | |
15180 | static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { |
15181 | SDLoc DL(N); |
15182 | SDValue Op0 = N->getOperand(0); |
15183 | SDValue Op1 = N->getOperand(1); |
15184 | EVT ResVT = N->getValueType(0); |
15185 | |
15186 | |
15187 | if (Op0.getOpcode() == AArch64ISD::UUNPKLO) { |
15188 | if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) { |
15189 | SDValue X = Op0.getOperand(0).getOperand(0); |
15190 | return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1); |
15191 | } |
15192 | } |
15193 | |
15194 | |
15195 | if (Op1.getOpcode() == AArch64ISD::UUNPKHI) { |
15196 | if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) { |
15197 | SDValue Z = Op1.getOperand(0).getOperand(1); |
15198 | return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z); |
15199 | } |
15200 | } |
15201 | |
15202 | return SDValue(); |
15203 | } |
15204 | |
15205 | static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) { |
15206 | unsigned Opc = N->getOpcode(); |
15207 | |
15208 | assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && |
15209 | Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || |
15210 | (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && |
15211 | Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && |
15212 | "Invalid opcode."); |
15213 | |
15214 | const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO || |
15215 | Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO; |
15216 | const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO || |
15217 | Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO; |
15218 | const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO || |
15219 | Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO || |
15220 | Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO || |
15221 | Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO; |
15222 | |
15223 | SDLoc DL(N); |
15224 | SDValue Chain = N->getOperand(0); |
15225 | SDValue Pg = N->getOperand(1); |
15226 | SDValue Base = N->getOperand(2); |
15227 | SDValue Offset = N->getOperand(3); |
15228 | SDValue Ty = N->getOperand(4); |
15229 | |
15230 | EVT ResVT = N->getValueType(0); |
15231 | |
15232 | const auto OffsetOpc = Offset.getOpcode(); |
15233 | const bool OffsetIsZExt = |
15234 | OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU; |
15235 | const bool OffsetIsSExt = |
15236 | OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU; |
15237 | |
15238 | |
15239 | if (!Extended && (OffsetIsSExt || OffsetIsZExt)) { |
15240 | SDValue ExtPg = Offset.getOperand(0); |
15241 | VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode()); |
15242 | EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType(); |
15243 | |
15244 | |
15245 | |
15246 | |
15247 | if (ExtPg == Pg && ExtFromEVT == MVT::i32) { |
15248 | SDValue UnextendedOffset = Offset.getOperand(1); |
15249 | |
15250 | unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true); |
15251 | if (Signed) |
15252 | NewOpc = getSignExtendedGatherOpcode(NewOpc); |
15253 | |
15254 | return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other}, |
15255 | {Chain, Pg, Base, UnextendedOffset, Ty}); |
15256 | } |
15257 | } |
15258 | |
15259 | return SDValue(); |
15260 | } |
15261 | |
15262 | |
15263 | |
15264 | static SDValue performVectorShiftCombine(SDNode *N, |
15265 | const AArch64TargetLowering &TLI, |
15266 | TargetLowering::DAGCombinerInfo &DCI) { |
15267 | assert(N->getOpcode() == AArch64ISD::VASHR || |
15268 | N->getOpcode() == AArch64ISD::VLSHR); |
15269 | |
15270 | SDValue Op = N->getOperand(0); |
15271 | unsigned OpScalarSize = Op.getScalarValueSizeInBits(); |
15272 | |
15273 | unsigned ShiftImm = N->getConstantOperandVal(1); |
15274 | assert(OpScalarSize > ShiftImm && "Invalid shift imm"); |
15275 | |
15276 | APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm); |
15277 | APInt DemandedMask = ~ShiftedOutBits; |
15278 | |
15279 | if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI)) |
15280 | return SDValue(N, 0); |
15281 | |
15282 | return SDValue(); |
15283 | } |
15284 | |
15285 | |
15286 | |
15287 | static SDValue performPostLD1Combine(SDNode *N, |
15288 | TargetLowering::DAGCombinerInfo &DCI, |
15289 | bool IsLaneOp) { |
15290 | if (DCI.isBeforeLegalizeOps()) |
15291 | return SDValue(); |
15292 | |
15293 | SelectionDAG &DAG = DCI.DAG; |
15294 | EVT VT = N->getValueType(0); |
15295 | |
15296 | if (VT.isScalableVector()) |
15297 | return SDValue(); |
15298 | |
15299 | unsigned LoadIdx = IsLaneOp ? 1 : 0; |
15300 | SDNode *LD = N->getOperand(LoadIdx).getNode(); |
15301 | |
15302 | if (LD->getOpcode() != ISD::LOAD) |
15303 | return SDValue(); |
15304 | |
15305 | |
15306 | SDValue Lane; |
15307 | if (IsLaneOp) { |
15308 | Lane = N->getOperand(2); |
15309 | auto *LaneC = dyn_cast<ConstantSDNode>(Lane); |
15310 | if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements()) |
15311 | return SDValue(); |
15312 | } |
15313 | |
15314 | LoadSDNode *LoadSDN = cast<LoadSDNode>(LD); |
15315 | EVT MemVT = LoadSDN->getMemoryVT(); |
15316 | |
15317 | if (MemVT != VT.getVectorElementType()) |
15318 | return SDValue(); |
15319 | |
15320 | |
15321 | |
15322 | for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; |
15323 | ++UI) { |
15324 | if (UI.getUse().getResNo() == 1) |
15325 | continue; |
15326 | if (*UI != N) |
15327 | return SDValue(); |
15328 | } |
15329 | |
15330 | SDValue Addr = LD->getOperand(1); |
15331 | SDValue Vector = N->getOperand(0); |
15332 | |
15333 | for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = |
15334 | Addr.getNode()->use_end(); UI != UE; ++UI) { |
15335 | SDNode *User = *UI; |
15336 | if (User->getOpcode() != ISD::ADD |
15337 | || UI.getUse().getResNo() != Addr.getResNo()) |
15338 | continue; |
15339 | |
15340 | |
15341 | SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); |
15342 | if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { |
15343 | uint32_t IncVal = CInc->getZExtValue(); |
15344 | unsigned NumBytes = VT.getScalarSizeInBits() / 8; |
15345 | if (IncVal != NumBytes) |
15346 | continue; |
15347 | Inc = DAG.getRegister(AArch64::XZR, MVT::i64); |
15348 | } |
15349 | |
15350 | |
15351 | |
15352 | SmallPtrSet<const SDNode *, 32> Visited; |
15353 | SmallVector<const SDNode *, 16> Worklist; |
15354 | Visited.insert(Addr.getNode()); |
15355 | Worklist.push_back(User); |
15356 | Worklist.push_back(LD); |
15357 | Worklist.push_back(Vector.getNode()); |
15358 | if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) || |
15359 | SDNode::hasPredecessorHelper(User, Visited, Worklist)) |
15360 | continue; |
15361 | |
15362 | SmallVector<SDValue, 8> Ops; |
15363 | Ops.push_back(LD->getOperand(0)); |
15364 | if (IsLaneOp) { |
15365 | Ops.push_back(Vector); |
15366 | Ops.push_back(Lane); |
15367 | } |
15368 | Ops.push_back(Addr); |
15369 | Ops.push_back(Inc); |
15370 | |
15371 | EVT Tys[3] = { VT, MVT::i64, MVT::Other }; |
15372 | SDVTList SDTys = DAG.getVTList(Tys); |
15373 | unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; |
15374 | SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, |
15375 | MemVT, |
15376 | LoadSDN->getMemOperand()); |
15377 | |
15378 | |
15379 | SDValue NewResults[] = { |
15380 | SDValue(LD, 0), |
15381 | SDValue(UpdN.getNode(), 2) |
15382 | }; |
15383 | DCI.CombineTo(LD, NewResults); |
15384 | DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); |
15385 | DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); |
15386 | |
15387 | break; |
15388 | } |
15389 | return SDValue(); |
15390 | } |
15391 | |
15392 | |
15393 | |
15394 | static bool performTBISimplification(SDValue Addr, |
15395 | TargetLowering::DAGCombinerInfo &DCI, |
15396 | SelectionDAG &DAG) { |
15397 | APInt DemandedMask = APInt::getLowBitsSet(64, 56); |
15398 | KnownBits Known; |
15399 | TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
15400 | !DCI.isBeforeLegalizeOps()); |
15401 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
15402 | if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) { |
15403 | DCI.CommitTargetLoweringOpt(TLO); |
15404 | return true; |
15405 | } |
15406 | return false; |
15407 | } |
15408 | |
15409 | static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) { |
15410 | assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && |
15411 | "Expected STORE dag node in input!"); |
15412 | |
15413 | if (auto Store = dyn_cast<StoreSDNode>(N)) { |
15414 | if (!Store->isTruncatingStore() || Store->isIndexed()) |
15415 | return SDValue(); |
15416 | SDValue Ext = Store->getValue(); |
15417 | auto ExtOpCode = Ext.getOpcode(); |
15418 | if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND && |
15419 | ExtOpCode != ISD::ANY_EXTEND) |
15420 | return SDValue(); |
15421 | SDValue Orig = Ext->getOperand(0); |
15422 | if (Store->getMemoryVT() != Orig.getValueType()) |
15423 | return SDValue(); |
15424 | return DAG.getStore(Store->getChain(), SDLoc(Store), Orig, |
15425 | Store->getBasePtr(), Store->getMemOperand()); |
15426 | } |
15427 | |
15428 | return SDValue(); |
15429 | } |
15430 | |
15431 | static SDValue performSTORECombine(SDNode *N, |
15432 | TargetLowering::DAGCombinerInfo &DCI, |
15433 | SelectionDAG &DAG, |
15434 | const AArch64Subtarget *Subtarget) { |
15435 | if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) |
15436 | return Split; |
15437 | |
15438 | if (Subtarget->supportsAddressTopByteIgnored() && |
15439 | performTBISimplification(N->getOperand(2), DCI, DAG)) |
15440 | return SDValue(N, 0); |
15441 | |
15442 | if (SDValue Store = foldTruncStoreOfExt(DAG, N)) |
15443 | return Store; |
15444 | |
15445 | return SDValue(); |
15446 | } |
15447 | |
15448 | |
15449 | |
15450 | static SDValue performNEONPostLDSTCombine(SDNode *N, |
15451 | TargetLowering::DAGCombinerInfo &DCI, |
15452 | SelectionDAG &DAG) { |
15453 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
15454 | return SDValue(); |
15455 | |
15456 | unsigned AddrOpIdx = N->getNumOperands() - 1; |
15457 | SDValue Addr = N->getOperand(AddrOpIdx); |
15458 | |
15459 | |
15460 | for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), |
15461 | UE = Addr.getNode()->use_end(); UI != UE; ++UI) { |
15462 | SDNode *User = *UI; |
15463 | if (User->getOpcode() != ISD::ADD || |
15464 | UI.getUse().getResNo() != Addr.getResNo()) |
15465 | continue; |
15466 | |
15467 | |
15468 | |
15469 | SmallPtrSet<const SDNode *, 32> Visited; |
15470 | SmallVector<const SDNode *, 16> Worklist; |
15471 | Visited.insert(Addr.getNode()); |
15472 | Worklist.push_back(N); |
15473 | Worklist.push_back(User); |
15474 | if (SDNode::hasPredecessorHelper(N, Visited, Worklist) || |
15475 | SDNode::hasPredecessorHelper(User, Visited, Worklist)) |
15476 | continue; |
15477 | |
15478 | |
15479 | bool IsStore = false; |
15480 | bool IsLaneOp = false; |
15481 | bool IsDupOp = false; |
15482 | unsigned NewOpc = 0; |
15483 | unsigned NumVecs = 0; |
15484 | unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
15485 | switch (IntNo) { |
15486 | default: llvm_unreachable("unexpected intrinsic for Neon base update"); |
15487 | case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; |
15488 | NumVecs = 2; break; |
15489 | case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post; |
15490 | NumVecs = 3; break; |
15491 | case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post; |
15492 | NumVecs = 4; break; |
15493 | case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post; |
15494 | NumVecs = 2; IsStore = true; break; |
15495 | case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post; |
15496 | NumVecs = 3; IsStore = true; break; |
15497 | case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post; |
15498 | NumVecs = 4; IsStore = true; break; |
15499 | case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post; |
15500 | NumVecs = 2; break; |
15501 | case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post; |
15502 | NumVecs = 3; break; |
15503 | case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post; |
15504 | NumVecs = 4; break; |
15505 | case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post; |
15506 | NumVecs = 2; IsStore = true; break; |
15507 | case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post; |
15508 | NumVecs = 3; IsStore = true; break; |
15509 | case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post; |
15510 | NumVecs = 4; IsStore = true; break; |
15511 | case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost; |
15512 | NumVecs = 2; IsDupOp = true; break; |
15513 | case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost; |
15514 | NumVecs = 3; IsDupOp = true; break; |
15515 | case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost; |
15516 | NumVecs = 4; IsDupOp = true; break; |
15517 | case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost; |
15518 | NumVecs = 2; IsLaneOp = true; break; |
15519 | case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost; |
15520 | NumVecs = 3; IsLaneOp = true; break; |
15521 | case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost; |
15522 | NumVecs = 4; IsLaneOp = true; break; |
15523 | case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost; |
15524 | NumVecs = 2; IsStore = true; IsLaneOp = true; break; |
15525 | case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost; |
15526 | NumVecs = 3; IsStore = true; IsLaneOp = true; break; |
15527 | case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost; |
15528 | NumVecs = 4; IsStore = true; IsLaneOp = true; break; |
15529 | } |
15530 | |
15531 | EVT VecTy; |
15532 | if (IsStore) |
15533 | VecTy = N->getOperand(2).getValueType(); |
15534 | else |
15535 | VecTy = N->getValueType(0); |
15536 | |
15537 | |
15538 | SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); |
15539 | if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { |
15540 | uint32_t IncVal = CInc->getZExtValue(); |
15541 | unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; |
15542 | if (IsLaneOp || IsDupOp) |
15543 | NumBytes /= VecTy.getVectorNumElements(); |
15544 | if (IncVal != NumBytes) |
15545 | continue; |
15546 | Inc = DAG.getRegister(AArch64::XZR, MVT::i64); |
15547 | } |
15548 | SmallVector<SDValue, 8> Ops; |
15549 | Ops.push_back(N->getOperand(0)); |
15550 | |
15551 | if (IsLaneOp || IsStore) |
15552 | for (unsigned i = 2; i < AddrOpIdx; ++i) |
15553 | Ops.push_back(N->getOperand(i)); |
15554 | Ops.push_back(Addr); |
15555 | Ops.push_back(Inc); |
15556 | |
15557 | |
15558 | EVT Tys[6]; |
15559 | unsigned NumResultVecs = (IsStore ? 0 : NumVecs); |
15560 | unsigned n; |
15561 | for (n = 0; n < NumResultVecs; ++n) |
15562 | Tys[n] = VecTy; |
15563 | Tys[n++] = MVT::i64; |
15564 | Tys[n] = MVT::Other; |
15565 | SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2)); |
15566 | |
15567 | MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); |
15568 | SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops, |
15569 | MemInt->getMemoryVT(), |
15570 | MemInt->getMemOperand()); |
15571 | |
15572 | |
15573 | std::vector<SDValue> NewResults; |
15574 | for (unsigned i = 0; i < NumResultVecs; ++i) { |
15575 | NewResults.push_back(SDValue(UpdN.getNode(), i)); |
15576 | } |
15577 | NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); |
15578 | DCI.CombineTo(N, NewResults); |
15579 | DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); |
15580 | |
15581 | break; |
15582 | } |
15583 | return SDValue(); |
15584 | } |
15585 | |
15586 | |
15587 | |
15588 | static |
15589 | bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) { |
15590 | ExtType = ISD::NON_EXTLOAD; |
15591 | switch(V.getNode()->getOpcode()) { |
15592 | default: |
15593 | return false; |
15594 | case ISD::LOAD: { |
15595 | LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode()); |
15596 | if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) |
15597 | || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) { |
15598 | ExtType = LoadNode->getExtensionType(); |
15599 | return true; |
15600 | } |
15601 | return false; |
15602 | } |
15603 | case ISD::AssertSext: { |
15604 | VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); |
15605 | if ((TypeNode->getVT() == MVT::i8 && width == 8) |
15606 | || (TypeNode->getVT() == MVT::i16 && width == 16)) { |
15607 | ExtType = ISD::SEXTLOAD; |
15608 | return true; |
15609 | } |
15610 | return false; |
15611 | } |
15612 | case ISD::AssertZext: { |
15613 | VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); |
15614 | if ((TypeNode->getVT() == MVT::i8 && width == 8) |
15615 | || (TypeNode->getVT() == MVT::i16 && width == 16)) { |
15616 | ExtType = ISD::ZEXTLOAD; |
15617 | return true; |
15618 | } |
15619 | return false; |
15620 | } |
15621 | case ISD::Constant: |
15622 | case ISD::TargetConstant: { |
15623 | return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) < |
15624 | 1LL << (width - 1); |
15625 | } |
15626 | } |
15627 | |
15628 | return true; |
15629 | } |
15630 | |
15631 | |
15632 | |
15633 | |
15634 | |
15635 | |
15636 | |
15637 | |
15638 | |
15639 | |
15640 | |
15641 | |
15642 | |
15643 | |
15644 | |
15645 | |
15646 | |
15647 | |
15648 | |
15649 | |
15650 | |
15651 | |
15652 | |
15653 | |
15654 | |
15655 | |
15656 | |
15657 | |
15658 | |
15659 | |
15660 | |
15661 | |
15662 | |
15663 | |
15664 | |
15665 | |
15666 | |
15667 | |
15668 | |
15669 | |
15670 | |
15671 | |
15672 | |
15673 | |
15674 | |
15675 | |
15676 | |
15677 | |
15678 | |
15679 | |
15680 | |
15681 | |
15682 | |
15683 | |
15684 | |
15685 | |
15686 | |
15687 | |
15688 | |
15689 | |
15690 | |
15691 | |
15692 | static bool isEquivalentMaskless(unsigned CC, unsigned width, |
15693 | ISD::LoadExtType ExtType, int AddConstant, |
15694 | int CompConstant) { |
15695 | |
15696 | |
15697 | |
15698 | int MaxUInt = (1 << width); |
15699 | |
15700 | |
15701 | |
15702 | |
15703 | |
15704 | |
15705 | if (ExtType == ISD::SEXTLOAD) |
15706 | AddConstant -= (1 << (width-1)); |
15707 | |
15708 | switch(CC) { |
15709 | case AArch64CC::LE: |
15710 | case AArch64CC::GT: |
15711 | if ((AddConstant == 0) || |
15712 | (CompConstant == MaxUInt - 1 && AddConstant < 0) || |
15713 | (AddConstant >= 0 && CompConstant < 0) || |
15714 | (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant)) |
15715 | return true; |
15716 | break; |
15717 | case AArch64CC::LT: |
15718 | case AArch64CC::GE: |
15719 | if ((AddConstant == 0) || |
15720 | (AddConstant >= 0 && CompConstant <= 0) || |
15721 | (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant)) |
15722 | return true; |
15723 | break; |
15724 | case AArch64CC::HI: |
15725 | case AArch64CC::LS: |
15726 | if ((AddConstant >= 0 && CompConstant < 0) || |
15727 | (AddConstant <= 0 && CompConstant >= -1 && |
15728 | CompConstant < AddConstant + MaxUInt)) |
15729 | return true; |
15730 | break; |
15731 | case AArch64CC::PL: |
15732 | case AArch64CC::MI: |
15733 | if ((AddConstant == 0) || |
15734 | (AddConstant > 0 && CompConstant <= 0) || |
15735 | (AddConstant < 0 && CompConstant <= AddConstant)) |
15736 | return true; |
15737 | break; |
15738 | case AArch64CC::LO: |
15739 | case AArch64CC::HS: |
15740 | if ((AddConstant >= 0 && CompConstant <= 0) || |
15741 | (AddConstant <= 0 && CompConstant >= 0 && |
15742 | CompConstant <= AddConstant + MaxUInt)) |
15743 | return true; |
15744 | break; |
15745 | case AArch64CC::EQ: |
15746 | case AArch64CC::NE: |
15747 | if ((AddConstant > 0 && CompConstant < 0) || |
15748 | (AddConstant < 0 && CompConstant >= 0 && |
15749 | CompConstant < AddConstant + MaxUInt) || |
15750 | (AddConstant >= 0 && CompConstant >= 0 && |
15751 | CompConstant >= AddConstant) || |
15752 | (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant)) |
15753 | return true; |
15754 | break; |
15755 | case AArch64CC::VS: |
15756 | case AArch64CC::VC: |
15757 | case AArch64CC::AL: |
15758 | case AArch64CC::NV: |
15759 | return true; |
15760 | case AArch64CC::Invalid: |
15761 | break; |
15762 | } |
15763 | |
15764 | return false; |
15765 | } |
15766 | |
15767 | static |
15768 | SDValue performCONDCombine(SDNode *N, |
15769 | TargetLowering::DAGCombinerInfo &DCI, |
15770 | SelectionDAG &DAG, unsigned CCIndex, |
15771 | unsigned CmpIndex) { |
15772 | unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue(); |
15773 | SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); |
15774 | unsigned CondOpcode = SubsNode->getOpcode(); |
15775 | |
15776 | if (CondOpcode != AArch64ISD::SUBS) |
15777 | return SDValue(); |
15778 | |
15779 | |
15780 | |
15781 | |
15782 | SDNode *AndNode = SubsNode->getOperand(0).getNode(); |
15783 | unsigned MaskBits = 0; |
15784 | |
15785 | if (AndNode->getOpcode() != ISD::AND) |
15786 | return SDValue(); |
15787 | |
15788 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) { |
15789 | uint32_t CNV = CN->getZExtValue(); |
15790 | if (CNV == 255) |
15791 | MaskBits = 8; |
15792 | else if (CNV == 65535) |
15793 | MaskBits = 16; |
15794 | } |
15795 | |
15796 | if (!MaskBits) |
15797 | return SDValue(); |
15798 | |
15799 | SDValue AddValue = AndNode->getOperand(0); |
15800 | |
15801 | if (AddValue.getOpcode() != ISD::ADD) |
15802 | return SDValue(); |
15803 | |
15804 | |
15805 | |
15806 | SDValue AddInputValue1 = AddValue.getNode()->getOperand(0); |
15807 | SDValue AddInputValue2 = AddValue.getNode()->getOperand(1); |
15808 | SDValue SubsInputValue = SubsNode->getOperand(1); |
15809 | |
15810 | |
15811 | |
15812 | |
15813 | if (!isa<ConstantSDNode>(AddInputValue2.getNode()) || |
15814 | !isa<ConstantSDNode>(SubsInputValue.getNode())) |
15815 | return SDValue(); |
15816 | |
15817 | ISD::LoadExtType ExtType; |
15818 | |
15819 | if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) || |
15820 | !checkValueWidth(AddInputValue2, MaskBits, ExtType) || |
15821 | !checkValueWidth(AddInputValue1, MaskBits, ExtType) ) |
15822 | return SDValue(); |
15823 | |
15824 | if(!isEquivalentMaskless(CC, MaskBits, ExtType, |
15825 | cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(), |
15826 | cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue())) |
15827 | return SDValue(); |
15828 | |
15829 | |
15830 | |
15831 | SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0), |
15832 | SubsNode->getValueType(1)); |
15833 | SDValue Ops[] = { AddValue, SubsNode->getOperand(1) }; |
15834 | |
15835 | SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops); |
15836 | DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode()); |
15837 | |
15838 | return SDValue(N, 0); |
15839 | } |
15840 | |
15841 | |
15842 | static SDValue performBRCONDCombine(SDNode *N, |
15843 | TargetLowering::DAGCombinerInfo &DCI, |
15844 | SelectionDAG &DAG) { |
15845 | MachineFunction &MF = DAG.getMachineFunction(); |
15846 | |
15847 | |
15848 | |
15849 | if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) |
15850 | return SDValue(); |
15851 | |
15852 | if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3)) |
15853 | N = NV.getNode(); |
15854 | SDValue Chain = N->getOperand(0); |
15855 | SDValue Dest = N->getOperand(1); |
15856 | SDValue CCVal = N->getOperand(2); |
15857 | SDValue Cmp = N->getOperand(3); |
15858 | |
15859 | assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!"); |
15860 | unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue(); |
15861 | if (CC != AArch64CC::EQ && CC != AArch64CC::NE) |
15862 | return SDValue(); |
15863 | |
15864 | unsigned CmpOpc = Cmp.getOpcode(); |
15865 | if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS) |
15866 | return SDValue(); |
15867 | |
15868 | |
15869 | |
15870 | if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) |
15871 | return SDValue(); |
15872 | |
15873 | SDValue LHS = Cmp.getOperand(0); |
15874 | SDValue RHS = Cmp.getOperand(1); |
15875 | |
15876 | assert(LHS.getValueType() == RHS.getValueType() && |
15877 | "Expected the value type to be the same for both operands!"); |
15878 | if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) |
15879 | return SDValue(); |
15880 | |
15881 | if (isNullConstant(LHS)) |
15882 | std::swap(LHS, RHS); |
15883 | |
15884 | if (!isNullConstant(RHS)) |
15885 | return SDValue(); |
15886 | |
15887 | if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || |
15888 | LHS.getOpcode() == ISD::SRL) |
15889 | return SDValue(); |
15890 | |
15891 | |
15892 | SDValue BR; |
15893 | if (CC == AArch64CC::EQ) |
15894 | BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); |
15895 | else |
15896 | BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); |
15897 | |
15898 | |
15899 | DCI.CombineTo(N, BR, false); |
15900 | |
15901 | return SDValue(); |
15902 | } |
15903 | |
15904 | |
15905 | static SDValue performCSELCombine(SDNode *N, |
15906 | TargetLowering::DAGCombinerInfo &DCI, |
15907 | SelectionDAG &DAG) { |
15908 | |
15909 | if (N->getOperand(0) == N->getOperand(1)) |
15910 | return N->getOperand(0); |
15911 | |
15912 | return performCONDCombine(N, DCI, DAG, 2, 3); |
15913 | } |
15914 | |
15915 | static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { |
15916 | assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); |
15917 | SDValue LHS = N->getOperand(0); |
15918 | SDValue RHS = N->getOperand(1); |
15919 | ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
15920 | |
15921 | |
15922 | if (Cond == ISD::SETNE && isOneConstant(RHS) && |
15923 | LHS->getOpcode() == AArch64ISD::CSEL && |
15924 | isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) && |
15925 | LHS->hasOneUse()) { |
15926 | SDLoc DL(N); |
15927 | |
15928 | |
15929 | auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2)); |
15930 | auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue()); |
15931 | auto NewCond = getInvertedCondCode(OldCond); |
15932 | |
15933 | |
15934 | SDValue CSEL = |
15935 | DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0), |
15936 | LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32), |
15937 | LHS.getOperand(3)); |
15938 | return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0)); |
15939 | } |
15940 | |
15941 | return SDValue(); |
15942 | } |
15943 | |
15944 | static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) { |
15945 | assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && |
15946 | "Unexpected opcode!"); |
15947 | |
15948 | SDValue Pred = N->getOperand(0); |
15949 | SDValue LHS = N->getOperand(1); |
15950 | SDValue RHS = N->getOperand(2); |
15951 | ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get(); |
15952 | |
15953 | |
15954 | |
15955 | if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) && |
15956 | LHS->getOpcode() == ISD::SIGN_EXTEND && |
15957 | LHS->getOperand(0)->getValueType(0) == N->getValueType(0) && |
15958 | LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && |
15959 | LHS->getOperand(0)->getOperand(0) == Pred) |
15960 | return LHS->getOperand(0); |
15961 | |
15962 | return SDValue(); |
15963 | } |
15964 | |
15965 | |
15966 | |
15967 | |
15968 | |
15969 | static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, |
15970 | SelectionDAG &DAG) { |
15971 | |
15972 | if (!Op->hasOneUse()) |
15973 | return Op; |
15974 | |
15975 | |
15976 | |
15977 | |
15978 | |
15979 | |
15980 | |
15981 | if (Op->getOpcode() == ISD::TRUNCATE && |
15982 | Bit < Op->getValueType(0).getSizeInBits()) { |
15983 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
15984 | } |
15985 | |
15986 | |
15987 | if (Op->getOpcode() == ISD::ANY_EXTEND && |
15988 | Bit < Op->getOperand(0).getValueSizeInBits()) { |
15989 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
15990 | } |
15991 | |
15992 | if (Op->getNumOperands() != 2) |
15993 | return Op; |
15994 | |
15995 | auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1)); |
15996 | if (!C) |
15997 | return Op; |
15998 | |
15999 | switch (Op->getOpcode()) { |
16000 | default: |
16001 | return Op; |
16002 | |
16003 | |
16004 | case ISD::AND: |
16005 | if ((C->getZExtValue() >> Bit) & 1) |
16006 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
16007 | return Op; |
16008 | |
16009 | |
16010 | case ISD::SHL: |
16011 | if (C->getZExtValue() <= Bit && |
16012 | (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { |
16013 | Bit = Bit - C->getZExtValue(); |
16014 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
16015 | } |
16016 | return Op; |
16017 | |
16018 | |
16019 | case ISD::SRA: |
16020 | Bit = Bit + C->getZExtValue(); |
16021 | if (Bit >= Op->getValueType(0).getSizeInBits()) |
16022 | Bit = Op->getValueType(0).getSizeInBits() - 1; |
16023 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
16024 | |
16025 | |
16026 | case ISD::SRL: |
16027 | if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { |
16028 | Bit = Bit + C->getZExtValue(); |
16029 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
16030 | } |
16031 | return Op; |
16032 | |
16033 | |
16034 | case ISD::XOR: |
16035 | if ((C->getZExtValue() >> Bit) & 1) |
16036 | Invert = !Invert; |
16037 | return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); |
16038 | } |
16039 | } |
16040 | |
16041 | |
16042 | static SDValue performTBZCombine(SDNode *N, |
16043 | TargetLowering::DAGCombinerInfo &DCI, |
16044 | SelectionDAG &DAG) { |
16045 | unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); |
16046 | bool Invert = false; |
16047 | SDValue TestSrc = N->getOperand(1); |
16048 | SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG); |
16049 | |
16050 | if (TestSrc == NewTestSrc) |
16051 | return SDValue(); |
16052 | |
16053 | unsigned NewOpc = N->getOpcode(); |
16054 | if (Invert) { |
16055 | if (NewOpc == AArch64ISD::TBZ) |
16056 | NewOpc = AArch64ISD::TBNZ; |
16057 | else { |
16058 | assert(NewOpc == AArch64ISD::TBNZ); |
16059 | NewOpc = AArch64ISD::TBZ; |
16060 | } |
16061 | } |
16062 | |
16063 | SDLoc DL(N); |
16064 | return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc, |
16065 | DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3)); |
16066 | } |
16067 | |
16068 | |
16069 | |
16070 | |
16071 | |
16072 | |
16073 | static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { |
16074 | SDValue N0 = N->getOperand(0); |
16075 | EVT CCVT = N0.getValueType(); |
16076 | |
16077 | |
16078 | |
16079 | |
16080 | SDValue SetCC = N->getOperand(0); |
16081 | if (SetCC.getOpcode() == ISD::SETCC && |
16082 | SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) { |
16083 | SDValue CmpLHS = SetCC.getOperand(0); |
16084 | EVT VT = CmpLHS.getValueType(); |
16085 | SDNode *CmpRHS = SetCC.getOperand(1).getNode(); |
16086 | SDNode *SplatLHS = N->getOperand(1).getNode(); |
16087 | SDNode *SplatRHS = N->getOperand(2).getNode(); |
16088 | APInt SplatLHSVal; |
16089 | if (CmpLHS.getValueType() == N->getOperand(1).getValueType() && |
16090 | VT.isSimple() && |
16091 | is_contained( |
16092 | makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, |
16093 | MVT::v2i32, MVT::v4i32, MVT::v2i64}), |
16094 | VT.getSimpleVT().SimpleTy) && |
16095 | ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) && |
16096 | SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) && |
16097 | ISD::isConstantSplatVectorAllOnes(SplatRHS)) { |
16098 | unsigned NumElts = VT.getVectorNumElements(); |
16099 | SmallVector<SDValue, 8> Ops( |
16100 | NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N), |
16101 | VT.getScalarType())); |
16102 | SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops); |
16103 | |
16104 | auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val); |
16105 | auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1)); |
16106 | return Or; |
16107 | } |
16108 | } |
16109 | |
16110 | if (N0.getOpcode() != ISD::SETCC || |
16111 | CCVT.getVectorElementCount() != ElementCount::getFixed(1) || |
16112 | CCVT.getVectorElementType() != MVT::i1) |
16113 | return SDValue(); |
16114 | |
16115 | EVT ResVT = N->getValueType(0); |
16116 | EVT CmpVT = N0.getOperand(0).getValueType(); |
16117 | |
16118 | |
16119 | if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) |
16120 | return SDValue(); |
16121 | |
16122 | SDValue IfTrue = N->getOperand(1); |
16123 | SDValue IfFalse = N->getOperand(2); |
16124 | SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), |
16125 | N0.getOperand(0), N0.getOperand(1), |
16126 | cast<CondCodeSDNode>(N0.getOperand(2))->get()); |
16127 | return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, |
16128 | IfTrue, IfFalse); |
16129 | } |
16130 | |
16131 | |
16132 | |
16133 | |
16134 | |
16135 | static SDValue performSelectCombine(SDNode *N, |
16136 | TargetLowering::DAGCombinerInfo &DCI) { |
16137 | SelectionDAG &DAG = DCI.DAG; |
16138 | SDValue N0 = N->getOperand(0); |
16139 | EVT ResVT = N->getValueType(0); |
16140 | |
16141 | if (N0.getOpcode() != ISD::SETCC) |
16142 | return SDValue(); |
16143 | |
16144 | if (ResVT.isScalableVector()) |
16145 | return SDValue(); |
16146 | |
16147 | |
16148 | |
16149 | |
16150 | assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && |
16151 | "Scalar-SETCC feeding SELECT has unexpected result type!"); |
16152 | |
16153 | |
16154 | |
16155 | |
16156 | EVT SrcVT = N0.getOperand(0).getValueType(); |
16157 | |
16158 | |
16159 | |
16160 | if (SrcVT == MVT::i1) |
16161 | return SDValue(); |
16162 | |
16163 | int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits(); |
16164 | if (!ResVT.isVector() || NumMaskElts == 0) |
16165 | return SDValue(); |
16166 | |
16167 | SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts); |
16168 | EVT CCVT = SrcVT.changeVectorElementTypeToInteger(); |
16169 | |
16170 | |
16171 | |
16172 | |
16173 | if (CCVT.getSizeInBits() != ResVT.getSizeInBits()) |
16174 | return SDValue(); |
16175 | |
16176 | |
16177 | assert(DCI.isBeforeLegalize() || |
16178 | DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)); |
16179 | |
16180 | |
16181 | |
16182 | SDLoc DL(N0); |
16183 | SDValue LHS = |
16184 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0)); |
16185 | SDValue RHS = |
16186 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1)); |
16187 | SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2)); |
16188 | |
16189 | |
16190 | SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0); |
16191 | SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask); |
16192 | Mask = DAG.getNode(ISD::BITCAST, DL, |
16193 | ResVT.changeVectorElementTypeToInteger(), Mask); |
16194 | |
16195 | return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); |
16196 | } |
16197 | |
16198 | |
16199 | static SDValue performNVCASTCombine(SDNode *N) { |
16200 | if (N->getValueType(0) == N->getOperand(0).getValueType()) |
16201 | return N->getOperand(0); |
16202 | |
16203 | return SDValue(); |
16204 | } |
16205 | |
16206 | |
16207 | |
16208 | |
16209 | static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, |
16210 | const AArch64Subtarget *Subtarget, |
16211 | const TargetMachine &TM) { |
16212 | auto *GN = cast<GlobalAddressSDNode>(N); |
16213 | if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) != |
16214 | AArch64II::MO_NO_FLAG) |
16215 | return SDValue(); |
16216 | |
16217 | uint64_t MinOffset = -1ull; |
16218 | for (SDNode *N : GN->uses()) { |
16219 | if (N->getOpcode() != ISD::ADD) |
16220 | return SDValue(); |
16221 | auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0)); |
16222 | if (!C) |
16223 | C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
16224 | if (!C) |
16225 | return SDValue(); |
16226 | MinOffset = std::min(MinOffset, C->getZExtValue()); |
16227 | } |
16228 | uint64_t Offset = MinOffset + GN->getOffset(); |
16229 | |
16230 | |
16231 | |
16232 | |
16233 | if (Offset <= uint64_t(GN->getOffset())) |
16234 | return SDValue(); |
16235 | |
16236 | |
16237 | |
16238 | |
16239 | |
16240 | |
16241 | |
16242 | |
16243 | |
16244 | if (Offset >= (1 << 21)) |
16245 | return SDValue(); |
16246 | |
16247 | const GlobalValue *GV = GN->getGlobal(); |
16248 | Type *T = GV->getValueType(); |
16249 | if (!T->isSized() || |
16250 | Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T)) |
16251 | return SDValue(); |
16252 | |
16253 | SDLoc DL(GN); |
16254 | SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset); |
16255 | return DAG.getNode(ISD::SUB, DL, MVT::i64, Result, |
16256 | DAG.getConstant(MinOffset, DL, MVT::i64)); |
16257 | } |
16258 | |
16259 | |
16260 | |
16261 | static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, |
16262 | SDLoc DL, unsigned BitWidth) { |
16263 | assert(Offset.getValueType().isScalableVector() && |
16264 | "This method is only for scalable vectors of offsets"); |
16265 | |
16266 | SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64); |
16267 | SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift); |
16268 | |
16269 | return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift); |
16270 | } |
16271 | |
16272 | |
16273 | |
16274 | |
16275 | |
16276 | |
16277 | |
16278 | |
16279 | inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, |
16280 | unsigned ScalarSizeInBytes) { |
16281 | |
16282 | if (OffsetInBytes % ScalarSizeInBytes) |
16283 | return false; |
16284 | |
16285 | |
16286 | if (OffsetInBytes / ScalarSizeInBytes > 31) |
16287 | return false; |
16288 | |
16289 | return true; |
16290 | } |
16291 | |
16292 | |
16293 | |
16294 | |
16295 | |
16296 | |
16297 | |
16298 | |
16299 | static bool isValidImmForSVEVecImmAddrMode(SDValue Offset, |
16300 | unsigned ScalarSizeInBytes) { |
16301 | ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode()); |
16302 | return OffsetConst && isValidImmForSVEVecImmAddrMode( |
16303 | OffsetConst->getZExtValue(), ScalarSizeInBytes); |
16304 | } |
16305 | |
16306 | static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, |
16307 | unsigned Opcode, |
16308 | bool OnlyPackedOffsets = true) { |
16309 | const SDValue Src = N->getOperand(2); |
16310 | const EVT SrcVT = Src->getValueType(0); |
16311 | assert(SrcVT.isScalableVector() && |
16312 | "Scatter stores are only possible for SVE vectors"); |
16313 | |
16314 | SDLoc DL(N); |
16315 | MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT(); |
16316 | |
16317 | |
16318 | if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock) |
16319 | return SDValue(); |
16320 | |
16321 | |
16322 | if (SrcElVT.isFloatingPoint()) |
16323 | if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64)) |
16324 | return SDValue(); |
16325 | |
16326 | |
16327 | |
16328 | SDValue Base = N->getOperand(4); |
16329 | |
16330 | |
16331 | SDValue Offset = N->getOperand(5); |
16332 | |
16333 | |
16334 | |
16335 | |
16336 | if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) { |
16337 | Offset = |
16338 | getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits()); |
16339 | Opcode = AArch64ISD::SSTNT1_PRED; |
16340 | } |
16341 | |
16342 | |
16343 | |
16344 | |
16345 | |
16346 | |
16347 | if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector()) |
16348 | std::swap(Base, Offset); |
16349 | |
16350 | |
16351 | |
16352 | |
16353 | |
16354 | |
16355 | |
16356 | if (Opcode == AArch64ISD::SST1_IMM_PRED) { |
16357 | if (!isValidImmForSVEVecImmAddrMode(Offset, |
16358 | SrcVT.getScalarSizeInBits() / 8)) { |
16359 | if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) |
16360 | Opcode = AArch64ISD::SST1_UXTW_PRED; |
16361 | else |
16362 | Opcode = AArch64ISD::SST1_PRED; |
16363 | |
16364 | std::swap(Base, Offset); |
16365 | } |
16366 | } |
16367 | |
16368 | auto &TLI = DAG.getTargetLoweringInfo(); |
16369 | if (!TLI.isTypeLegal(Base.getValueType())) |
16370 | return SDValue(); |
16371 | |
16372 | |
16373 | |
16374 | |
16375 | if (!OnlyPackedOffsets && |
16376 | Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32) |
16377 | Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0); |
16378 | |
16379 | if (!TLI.isTypeLegal(Offset.getValueType())) |
16380 | return SDValue(); |
16381 | |
16382 | |
16383 | EVT HwSrcVt = getSVEContainerType(SrcVT); |
16384 | |
16385 | |
16386 | |
16387 | |
16388 | SDValue InputVT = DAG.getValueType(SrcVT); |
16389 | if (SrcVT.isFloatingPoint()) |
16390 | InputVT = DAG.getValueType(HwSrcVt); |
16391 | |
16392 | SDVTList VTs = DAG.getVTList(MVT::Other); |
16393 | SDValue SrcNew; |
16394 | |
16395 | if (Src.getValueType().isFloatingPoint()) |
16396 | SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src); |
16397 | else |
16398 | SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src); |
16399 | |
16400 | SDValue Ops[] = {N->getOperand(0), |
16401 | SrcNew, |
16402 | N->getOperand(3), |
16403 | Base, |
16404 | Offset, |
16405 | InputVT}; |
16406 | |
16407 | return DAG.getNode(Opcode, DL, VTs, Ops); |
16408 | } |
16409 | |
16410 | static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, |
16411 | unsigned Opcode, |
16412 | bool OnlyPackedOffsets = true) { |
16413 | const EVT RetVT = N->getValueType(0); |
16414 | assert(RetVT.isScalableVector() && |
16415 | "Gather loads are only possible for SVE vectors"); |
16416 | |
16417 | SDLoc DL(N); |
16418 | |
16419 | |
16420 | if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock) |
16421 | return SDValue(); |
16422 | |
16423 | |
16424 | |
16425 | SDValue Base = N->getOperand(3); |
16426 | |
16427 | |
16428 | SDValue Offset = N->getOperand(4); |
16429 | |
16430 | |
16431 | |
16432 | |
16433 | if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) { |
16434 | Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, |
16435 | RetVT.getScalarSizeInBits()); |
16436 | Opcode = AArch64ISD::GLDNT1_MERGE_ZERO; |
16437 | } |
16438 | |
16439 | |
16440 | |
16441 | |
16442 | |
16443 | |
16444 | if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO && |
16445 | Offset.getValueType().isVector()) |
16446 | std::swap(Base, Offset); |
16447 | |
16448 | |
16449 | |
16450 | |
16451 | |
16452 | |
16453 | |
16454 | if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO || |
16455 | Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) { |
16456 | if (!isValidImmForSVEVecImmAddrMode(Offset, |
16457 | RetVT.getScalarSizeInBits() / 8)) { |
16458 | if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) |
16459 | Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO) |
16460 | ? AArch64ISD::GLD1_UXTW_MERGE_ZERO |
16461 | : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO; |
16462 | else |
16463 | Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO) |
16464 | ? AArch64ISD::GLD1_MERGE_ZERO |
16465 | : AArch64ISD::GLDFF1_MERGE_ZERO; |
16466 | |
16467 | std::swap(Base, Offset); |
16468 | } |
16469 | } |
16470 | |
16471 | auto &TLI = DAG.getTargetLoweringInfo(); |
16472 | if (!TLI.isTypeLegal(Base.getValueType())) |
16473 | return SDValue(); |
16474 | |
16475 | |
16476 | |
16477 | |
16478 | if (!OnlyPackedOffsets && |
16479 | Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32) |
16480 | Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0); |
16481 | |
16482 | |
16483 | EVT HwRetVt = getSVEContainerType(RetVT); |
16484 | |
16485 | |
16486 | |
16487 | |
16488 | SDValue OutVT = DAG.getValueType(RetVT); |
16489 | if (RetVT.isFloatingPoint()) |
16490 | OutVT = DAG.getValueType(HwRetVt); |
16491 | |
16492 | SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other); |
16493 | SDValue Ops[] = {N->getOperand(0), |
16494 | N->getOperand(2), |
16495 | Base, Offset, OutVT}; |
16496 | |
16497 | SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops); |
16498 | SDValue LoadChain = SDValue(Load.getNode(), 1); |
16499 | |
16500 | if (RetVT.isInteger() && (RetVT != HwRetVt)) |
16501 | Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0)); |
16502 | |
16503 | |
16504 | |
16505 | if (RetVT.isFloatingPoint()) |
16506 | Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0)); |
16507 | |
16508 | return DAG.getMergeValues({Load, LoadChain}, DL); |
16509 | } |
16510 | |
16511 | static SDValue |
16512 | performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, |
16513 | SelectionDAG &DAG) { |
16514 | SDLoc DL(N); |
16515 | SDValue Src = N->getOperand(0); |
16516 | unsigned Opc = Src->getOpcode(); |
16517 | |
16518 | |
16519 | if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) { |
16520 | |
16521 | unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI |
16522 | : AArch64ISD::SUNPKLO; |
16523 | |
16524 | |
16525 | |
16526 | |
16527 | |
16528 | |
16529 | |
16530 | |
16531 | |
16532 | SDValue ExtOp = Src->getOperand(0); |
16533 | auto VT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
16534 | EVT EltTy = VT.getVectorElementType(); |
16535 | (void)EltTy; |
16536 | |
16537 | assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && |
16538 | "Sign extending from an invalid type"); |
16539 | |
16540 | EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext()); |
16541 | |
16542 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(), |
16543 | ExtOp, DAG.getValueType(ExtVT)); |
16544 | |
16545 | return DAG.getNode(SOpc, DL, N->getValueType(0), Ext); |
16546 | } |
16547 | |
16548 | if (DCI.isBeforeLegalizeOps()) |
16549 | return SDValue(); |
16550 | |
16551 | if (!EnableCombineMGatherIntrinsics) |
16552 | return SDValue(); |
16553 | |
16554 | |
16555 | |
16556 | unsigned NewOpc; |
16557 | unsigned MemVTOpNum = 4; |
16558 | switch (Opc) { |
16559 | case AArch64ISD::LD1_MERGE_ZERO: |
16560 | NewOpc = AArch64ISD::LD1S_MERGE_ZERO; |
16561 | MemVTOpNum = 3; |
16562 | break; |
16563 | case AArch64ISD::LDNF1_MERGE_ZERO: |
16564 | NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO; |
16565 | MemVTOpNum = 3; |
16566 | break; |
16567 | case AArch64ISD::LDFF1_MERGE_ZERO: |
16568 | NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO; |
16569 | MemVTOpNum = 3; |
16570 | break; |
16571 | case AArch64ISD::GLD1_MERGE_ZERO: |
16572 | NewOpc = AArch64ISD::GLD1S_MERGE_ZERO; |
16573 | break; |
16574 | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: |
16575 | NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO; |
16576 | break; |
16577 | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: |
16578 | NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO; |
16579 | break; |
16580 | case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: |
16581 | NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; |
16582 | break; |
16583 | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: |
16584 | NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO; |
16585 | break; |
16586 | case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: |
16587 | NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; |
16588 | break; |
16589 | case AArch64ISD::GLD1_IMM_MERGE_ZERO: |
16590 | NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO; |
16591 | break; |
16592 | case AArch64ISD::GLDFF1_MERGE_ZERO: |
16593 | NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO; |
16594 | break; |
16595 | case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO: |
16596 | NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO; |
16597 | break; |
16598 | case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO: |
16599 | NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO; |
16600 | break; |
16601 | case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO: |
16602 | NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO; |
16603 | break; |
16604 | case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO: |
16605 | NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO; |
16606 | break; |
16607 | case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO: |
16608 | NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO; |
16609 | break; |
16610 | case AArch64ISD::GLDFF1_IMM_MERGE_ZERO: |
16611 | NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO; |
16612 | break; |
16613 | case AArch64ISD::GLDNT1_MERGE_ZERO: |
16614 | NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO; |
16615 | break; |
16616 | default: |
16617 | return SDValue(); |
16618 | } |
16619 | |
16620 | EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
16621 | EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT(); |
16622 | |
16623 | if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse()) |
16624 | return SDValue(); |
16625 | |
16626 | EVT DstVT = N->getValueType(0); |
16627 | SDVTList VTs = DAG.getVTList(DstVT, MVT::Other); |
16628 | |
16629 | SmallVector<SDValue, 5> Ops; |
16630 | for (unsigned I = 0; I < Src->getNumOperands(); ++I) |
16631 | Ops.push_back(Src->getOperand(I)); |
16632 | |
16633 | SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops); |
16634 | DCI.CombineTo(N, ExtLoad); |
16635 | DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1)); |
16636 | |
16637 | |
16638 | return SDValue(N, 0); |
16639 | } |
16640 | |
16641 | |
16642 | |
16643 | |
16644 | static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) { |
16645 | const unsigned OffsetPos = 4; |
16646 | SDValue Offset = N->getOperand(OffsetPos); |
16647 | |
16648 | |
16649 | if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32) |
16650 | return SDValue(); |
16651 | |
16652 | |
16653 | SDLoc DL(N); |
16654 | Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset); |
16655 | SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end()); |
16656 | |
16657 | Ops[OffsetPos] = Offset; |
16658 | |
16659 | return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); |
16660 | } |
16661 | |
16662 | |
16663 | |
16664 | |
16665 | |
16666 | |
16667 | static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, |
16668 | unsigned ScalarSizeInBytes) { |
16669 | const unsigned ImmPos = 4, OffsetPos = 3; |
16670 | |
16671 | if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes)) |
16672 | return SDValue(); |
16673 | |
16674 | |
16675 | SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end()); |
16676 | std::swap(Ops[ImmPos], Ops[OffsetPos]); |
16677 | |
16678 | |
16679 | SDLoc DL(N); |
16680 | Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL, |
16681 | MVT::i64); |
16682 | |
16683 | return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); |
16684 | } |
16685 | |
16686 | |
16687 | |
16688 | static bool isLanes1toNKnownZero(SDValue Op) { |
16689 | switch (Op.getOpcode()) { |
16690 | default: |
16691 | return false; |
16692 | case AArch64ISD::ANDV_PRED: |
16693 | case AArch64ISD::EORV_PRED: |
16694 | case AArch64ISD::FADDA_PRED: |
16695 | case AArch64ISD::FADDV_PRED: |
16696 | case AArch64ISD::FMAXNMV_PRED: |
16697 | case AArch64ISD::FMAXV_PRED: |
16698 | case AArch64ISD::FMINNMV_PRED: |
16699 | case AArch64ISD::FMINV_PRED: |
16700 | case AArch64ISD::ORV_PRED: |
16701 | case AArch64ISD::SADDV_PRED: |
16702 | case AArch64ISD::SMAXV_PRED: |
16703 | case AArch64ISD::SMINV_PRED: |
16704 | case AArch64ISD::UADDV_PRED: |
16705 | case AArch64ISD::UMAXV_PRED: |
16706 | case AArch64ISD::UMINV_PRED: |
16707 | return true; |
16708 | } |
16709 | } |
16710 | |
16711 | static SDValue removeRedundantInsertVectorElt(SDNode *N) { |
16712 | assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!"); |
16713 | SDValue InsertVec = N->getOperand(0); |
16714 | SDValue InsertElt = N->getOperand(1); |
16715 | SDValue InsertIdx = N->getOperand(2); |
16716 | |
16717 | |
16718 | if (!isNullConstant(InsertIdx)) |
16719 | return SDValue(); |
16720 | |
16721 | if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode())) |
16722 | return SDValue(); |
16723 | |
16724 | if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
16725 | return SDValue(); |
16726 | |
16727 | SDValue ExtractVec = InsertElt.getOperand(0); |
16728 | SDValue ExtractIdx = InsertElt.getOperand(1); |
16729 | |
16730 | |
16731 | if (!isNullConstant(ExtractIdx)) |
16732 | return SDValue(); |
16733 | |
16734 | |
16735 | |
16736 | |
16737 | if (N->getValueType(0) != ExtractVec.getValueType()) |
16738 | return SDValue(); |
16739 | |
16740 | if (!isLanes1toNKnownZero(ExtractVec)) |
16741 | return SDValue(); |
16742 | |
16743 | |
16744 | return ExtractVec; |
16745 | } |
16746 | |
16747 | static SDValue |
16748 | performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { |
16749 | if (SDValue Res = removeRedundantInsertVectorElt(N)) |
16750 | return Res; |
16751 | |
16752 | return performPostLD1Combine(N, DCI, true); |
16753 | } |
16754 | |
16755 | SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) { |
16756 | EVT Ty = N->getValueType(0); |
16757 | if (Ty.isInteger()) |
16758 | return SDValue(); |
16759 | |
16760 | EVT IntTy = Ty.changeVectorElementTypeToInteger(); |
16761 | EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount()); |
16762 | if (ExtIntTy.getVectorElementType().getScalarSizeInBits() < |
16763 | IntTy.getVectorElementType().getScalarSizeInBits()) |
16764 | return SDValue(); |
16765 | |
16766 | SDLoc DL(N); |
16767 | SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)), |
16768 | DL, ExtIntTy); |
16769 | SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)), |
16770 | DL, ExtIntTy); |
16771 | SDValue Idx = N->getOperand(2); |
16772 | SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx); |
16773 | SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy); |
16774 | return DAG.getBitcast(Ty, Trunc); |
16775 | } |
16776 | |
16777 | SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, |
16778 | DAGCombinerInfo &DCI) const { |
16779 | SelectionDAG &DAG = DCI.DAG; |
16780 | switch (N->getOpcode()) { |
16781 | default: |
16782 | LLVM_DEBUG(dbgs() << "Custom combining: skipping\n"); |
16783 | break; |
16784 | case ISD::ADD: |
16785 | case ISD::SUB: |
16786 | return performAddSubCombine(N, DCI, DAG); |
16787 | case ISD::XOR: |
16788 | return performXorCombine(N, DAG, DCI, Subtarget); |
16789 | case ISD::MUL: |
16790 | return performMulCombine(N, DAG, DCI, Subtarget); |
16791 | case ISD::SINT_TO_FP: |
16792 | case ISD::UINT_TO_FP: |
16793 | return performIntToFpCombine(N, DAG, Subtarget); |
16794 | case ISD::FP_TO_SINT: |
16795 | case ISD::FP_TO_UINT: |
16796 | return performFpToIntCombine(N, DAG, DCI, Subtarget); |
16797 | case ISD::FDIV: |
16798 | return performFDivCombine(N, DAG, DCI, Subtarget); |
16799 | case ISD::OR: |
16800 | return performORCombine(N, DCI, Subtarget); |
16801 | case ISD::AND: |
16802 | return performANDCombine(N, DCI); |
16803 | case ISD::SRL: |
16804 | return performSRLCombine(N, DCI); |
16805 | case ISD::INTRINSIC_WO_CHAIN: |
16806 | return performIntrinsicCombine(N, DCI, Subtarget); |
16807 | case ISD::ANY_EXTEND: |
16808 | case ISD::ZERO_EXTEND: |
16809 | case ISD::SIGN_EXTEND: |
16810 | return performExtendCombine(N, DCI, DAG); |
16811 | case ISD::SIGN_EXTEND_INREG: |
16812 | return performSignExtendInRegCombine(N, DCI, DAG); |
16813 | case ISD::TRUNCATE: |
16814 | return performVectorTruncateCombine(N, DCI, DAG); |
16815 | case ISD::CONCAT_VECTORS: |
16816 | return performConcatVectorsCombine(N, DCI, DAG); |
16817 | case ISD::INSERT_SUBVECTOR: |
16818 | return performInsertSubvectorCombine(N, DCI, DAG); |
16819 | case ISD::SELECT: |
16820 | return performSelectCombine(N, DCI); |
16821 | case ISD::VSELECT: |
16822 | return performVSelectCombine(N, DCI.DAG); |
16823 | case ISD::SETCC: |
16824 | return performSETCCCombine(N, DAG); |
16825 | case ISD::LOAD: |
16826 | if (performTBISimplification(N->getOperand(1), DCI, DAG)) |
16827 | return SDValue(N, 0); |
16828 | break; |
16829 | case ISD::STORE: |
16830 | return performSTORECombine(N, DCI, DAG, Subtarget); |
16831 | case ISD::VECTOR_SPLICE: |
16832 | return performSVESpliceCombine(N, DAG); |
16833 | case AArch64ISD::BRCOND: |
16834 | return performBRCONDCombine(N, DCI, DAG); |
16835 | case AArch64ISD::TBNZ: |
16836 | case AArch64ISD::TBZ: |
16837 | return performTBZCombine(N, DCI, DAG); |
16838 | case AArch64ISD::CSEL: |
16839 | return performCSELCombine(N, DCI, DAG); |
16840 | case AArch64ISD::DUP: |
16841 | return performPostLD1Combine(N, DCI, false); |
16842 | case AArch64ISD::NVCAST: |
16843 | return performNVCASTCombine(N); |
16844 | case AArch64ISD::SPLICE: |
16845 | return performSpliceCombine(N, DAG); |
16846 | case AArch64ISD::UZP1: |
16847 | return performUzpCombine(N, DAG); |
16848 | case AArch64ISD::SETCC_MERGE_ZERO: |
16849 | return performSetccMergeZeroCombine(N, DAG); |
16850 | case AArch64ISD::GLD1_MERGE_ZERO: |
16851 | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: |
16852 | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: |
16853 | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: |
16854 | case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: |
16855 | case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: |
16856 | case AArch64ISD::GLD1_IMM_MERGE_ZERO: |
16857 | case AArch64ISD::GLD1S_MERGE_ZERO: |
16858 | case AArch64ISD::GLD1S_SCALED_MERGE_ZERO: |
16859 | case AArch64ISD::GLD1S_UXTW_MERGE_ZERO: |
16860 | case AArch64ISD::GLD1S_SXTW_MERGE_ZERO: |
16861 | case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO: |
16862 | case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO: |
16863 | case AArch64ISD::GLD1S_IMM_MERGE_ZERO: |
16864 | return performGLD1Combine(N, DAG); |
16865 | case AArch64ISD::VASHR: |
16866 | case AArch64ISD::VLSHR: |
16867 | return performVectorShiftCombine(N, *this, DCI); |
16868 | case ISD::INSERT_VECTOR_ELT: |
16869 | return performInsertVectorEltCombine(N, DCI); |
16870 | case ISD::EXTRACT_VECTOR_ELT: |
16871 | return performExtractVectorEltCombine(N, DAG); |
16872 | case ISD::VECREDUCE_ADD: |
16873 | return performVecReduceAddCombine(N, DCI.DAG, Subtarget); |
16874 | case ISD::INTRINSIC_VOID: |
16875 | case ISD::INTRINSIC_W_CHAIN: |
16876 | switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { |
16877 | case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: |
16878 | return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 ); |
16879 | case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: |
16880 | return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 ); |
16881 | case Intrinsic::aarch64_sve_prfw_gather_scalar_offset: |
16882 | return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 ); |
16883 | case Intrinsic::aarch64_sve_prfd_gather_scalar_offset: |
16884 | return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 ); |
16885 | case Intrinsic::aarch64_sve_prfb_gather_uxtw_index: |
16886 | case Intrinsic::aarch64_sve_prfb_gather_sxtw_index: |
16887 | case Intrinsic::aarch64_sve_prfh_gather_uxtw_index: |
16888 | case Intrinsic::aarch64_sve_prfh_gather_sxtw_index: |
16889 | case Intrinsic::aarch64_sve_prfw_gather_uxtw_index: |
16890 | case Intrinsic::aarch64_sve_prfw_gather_sxtw_index: |
16891 | case Intrinsic::aarch64_sve_prfd_gather_uxtw_index: |
16892 | case Intrinsic::aarch64_sve_prfd_gather_sxtw_index: |
16893 | return legalizeSVEGatherPrefetchOffsVec(N, DAG); |
16894 | case Intrinsic::aarch64_neon_ld2: |
16895 | case Intrinsic::aarch64_neon_ld3: |
16896 | case Intrinsic::aarch64_neon_ld4: |
16897 | case Intrinsic::aarch64_neon_ld1x2: |
16898 | case Intrinsic::aarch64_neon_ld1x3: |
16899 | case Intrinsic::aarch64_neon_ld1x4: |
16900 | case Intrinsic::aarch64_neon_ld2lane: |
16901 | case Intrinsic::aarch64_neon_ld3lane: |
16902 | case Intrinsic::aarch64_neon_ld4lane: |
16903 | case Intrinsic::aarch64_neon_ld2r: |
16904 | case Intrinsic::aarch64_neon_ld3r: |
16905 | case Intrinsic::aarch64_neon_ld4r: |
16906 | case Intrinsic::aarch64_neon_st2: |
16907 | case Intrinsic::aarch64_neon_st3: |
16908 | case Intrinsic::aarch64_neon_st4: |
16909 | case Intrinsic::aarch64_neon_st1x2: |
16910 | case Intrinsic::aarch64_neon_st1x3: |
16911 | case Intrinsic::aarch64_neon_st1x4: |
16912 | case Intrinsic::aarch64_neon_st2lane: |
16913 | case Intrinsic::aarch64_neon_st3lane: |
16914 | case Intrinsic::aarch64_neon_st4lane: |
16915 | return performNEONPostLDSTCombine(N, DCI, DAG); |
16916 | case Intrinsic::aarch64_sve_ldnt1: |
16917 | return performLDNT1Combine(N, DAG); |
16918 | case Intrinsic::aarch64_sve_ld1rq: |
16919 | return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG); |
16920 | case Intrinsic::aarch64_sve_ld1ro: |
16921 | return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG); |
16922 | case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset: |
16923 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); |
16924 | case Intrinsic::aarch64_sve_ldnt1_gather: |
16925 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); |
16926 | case Intrinsic::aarch64_sve_ldnt1_gather_index: |
16927 | return performGatherLoadCombine(N, DAG, |
16928 | AArch64ISD::GLDNT1_INDEX_MERGE_ZERO); |
16929 | case Intrinsic::aarch64_sve_ldnt1_gather_uxtw: |
16930 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); |
16931 | case Intrinsic::aarch64_sve_ld1: |
16932 | return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO); |
16933 | case Intrinsic::aarch64_sve_ldnf1: |
16934 | return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO); |
16935 | case Intrinsic::aarch64_sve_ldff1: |
16936 | return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO); |
16937 | case Intrinsic::aarch64_sve_st1: |
16938 | return performST1Combine(N, DAG); |
16939 | case Intrinsic::aarch64_sve_stnt1: |
16940 | return performSTNT1Combine(N, DAG); |
16941 | case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset: |
16942 | return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); |
16943 | case Intrinsic::aarch64_sve_stnt1_scatter_uxtw: |
16944 | return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); |
16945 | case Intrinsic::aarch64_sve_stnt1_scatter: |
16946 | return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); |
16947 | case Intrinsic::aarch64_sve_stnt1_scatter_index: |
16948 | return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED); |
16949 | case Intrinsic::aarch64_sve_ld1_gather: |
16950 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO); |
16951 | case Intrinsic::aarch64_sve_ld1_gather_index: |
16952 | return performGatherLoadCombine(N, DAG, |
16953 | AArch64ISD::GLD1_SCALED_MERGE_ZERO); |
16954 | case Intrinsic::aarch64_sve_ld1_gather_sxtw: |
16955 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO, |
16956 | false); |
16957 | case Intrinsic::aarch64_sve_ld1_gather_uxtw: |
16958 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO, |
16959 | false); |
16960 | case Intrinsic::aarch64_sve_ld1_gather_sxtw_index: |
16961 | return performGatherLoadCombine(N, DAG, |
16962 | AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO, |
16963 | false); |
16964 | case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: |
16965 | return performGatherLoadCombine(N, DAG, |
16966 | AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO, |
16967 | false); |
16968 | case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: |
16969 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO); |
16970 | case Intrinsic::aarch64_sve_ldff1_gather: |
16971 | return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO); |
16972 | case Intrinsic::aarch64_sve_ldff1_gather_index: |
16973 | return performGatherLoadCombine(N, DAG, |
16974 | AArch64ISD::GLDFF1_SCALED_MERGE_ZERO); |
16975 | case Intrinsic::aarch64_sve_ldff1_gather_sxtw: |
16976 | return performGatherLoadCombine(N, DAG, |
16977 | AArch64ISD::GLDFF1_SXTW_MERGE_ZERO, |
16978 | false); |
16979 | case Intrinsic::aarch64_sve_ldff1_gather_uxtw: |
16980 | return performGatherLoadCombine(N, DAG, |
16981 | AArch64ISD::GLDFF1_UXTW_MERGE_ZERO, |
16982 | false); |
16983 | case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index: |
16984 | return performGatherLoadCombine(N, DAG, |
16985 | AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO, |
16986 | false); |
16987 | case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index: |
16988 | return performGatherLoadCombine(N, DAG, |
16989 | AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO, |
16990 | false); |
16991 | case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset: |
16992 | return performGatherLoadCombine(N, DAG, |
16993 | AArch64ISD::GLDFF1_IMM_MERGE_ZERO); |
16994 | case Intrinsic::aarch64_sve_st1_scatter: |
16995 | return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED); |
16996 | case Intrinsic::aarch64_sve_st1_scatter_index: |
16997 | return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED); |
16998 | case Intrinsic::aarch64_sve_st1_scatter_sxtw: |
16999 | return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED, |
17000 | false); |
17001 | case Intrinsic::aarch64_sve_st1_scatter_uxtw: |
17002 | return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED, |
17003 | false); |
17004 | case Intrinsic::aarch64_sve_st1_scatter_sxtw_index: |
17005 | return performScatterStoreCombine(N, DAG, |
17006 | AArch64ISD::SST1_SXTW_SCALED_PRED, |
17007 | false); |
17008 | case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: |
17009 | return performScatterStoreCombine(N, DAG, |
17010 | AArch64ISD::SST1_UXTW_SCALED_PRED, |
17011 | false); |
17012 | case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: |
17013 | return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); |
17014 | case Intrinsic::aarch64_sve_tuple_get: { |
17015 | SDLoc DL(N); |
17016 | SDValue Chain = N->getOperand(0); |
17017 | SDValue Src1 = N->getOperand(2); |
17018 | SDValue Idx = N->getOperand(3); |
17019 | |
17020 | uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue(); |
17021 | EVT ResVT = N->getValueType(0); |
17022 | uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); |
17023 | SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); |
17024 | SDValue Val = |
17025 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); |
17026 | return DAG.getMergeValues({Val, Chain}, DL); |
17027 | } |
17028 | case Intrinsic::aarch64_sve_tuple_set: { |
17029 | SDLoc DL(N); |
17030 | SDValue Chain = N->getOperand(0); |
17031 | SDValue Tuple = N->getOperand(2); |
17032 | SDValue Idx = N->getOperand(3); |
17033 | SDValue Vec = N->getOperand(4); |
17034 | |
17035 | EVT TupleVT = Tuple.getValueType(); |
17036 | uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); |
17037 | |
17038 | uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue(); |
17039 | uint64_t NumLanes = |
17040 | Vec.getValueType().getVectorElementCount().getKnownMinValue(); |
17041 | |
17042 | if ((TupleLanes % NumLanes) != 0) |
17043 | report_fatal_error("invalid tuple vector!"); |
17044 | |
17045 | uint64_t NumVecs = TupleLanes / NumLanes; |
17046 | |
17047 | SmallVector<SDValue, 4> Opnds; |
17048 | for (unsigned I = 0; I < NumVecs; ++I) { |
17049 | if (I == IdxConst) |
17050 | Opnds.push_back(Vec); |
17051 | else { |
17052 | SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL); |
17053 | Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, |
17054 | Vec.getValueType(), Tuple, ExtIdx)); |
17055 | } |
17056 | } |
17057 | SDValue Concat = |
17058 | DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds); |
17059 | return DAG.getMergeValues({Concat, Chain}, DL); |
17060 | } |
17061 | case Intrinsic::aarch64_sve_tuple_create2: |
17062 | case Intrinsic::aarch64_sve_tuple_create3: |
17063 | case Intrinsic::aarch64_sve_tuple_create4: { |
17064 | SDLoc DL(N); |
17065 | SDValue Chain = N->getOperand(0); |
17066 | |
17067 | SmallVector<SDValue, 4> Opnds; |
17068 | for (unsigned I = 2; I < N->getNumOperands(); ++I) |
17069 | Opnds.push_back(N->getOperand(I)); |
17070 | |
17071 | EVT VT = Opnds[0].getValueType(); |
17072 | EVT EltVT = VT.getVectorElementType(); |
17073 | EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, |
17074 | VT.getVectorElementCount() * |
17075 | (N->getNumOperands() - 2)); |
17076 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds); |
17077 | return DAG.getMergeValues({Concat, Chain}, DL); |
17078 | } |
17079 | case Intrinsic::aarch64_sve_ld2: |
17080 | case Intrinsic::aarch64_sve_ld3: |
17081 | case Intrinsic::aarch64_sve_ld4: { |
17082 | SDLoc DL(N); |
17083 | SDValue Chain = N->getOperand(0); |
17084 | SDValue Mask = N->getOperand(2); |
17085 | SDValue BasePtr = N->getOperand(3); |
17086 | SDValue LoadOps[] = {Chain, Mask, BasePtr}; |
17087 | unsigned IntrinsicID = |
17088 | cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
17089 | SDValue Result = |
17090 | LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL); |
17091 | return DAG.getMergeValues({Result, Chain}, DL); |
17092 | } |
17093 | case Intrinsic::aarch64_rndr: |
17094 | case Intrinsic::aarch64_rndrrs: { |
17095 | unsigned IntrinsicID = |
17096 | cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
17097 | auto Register = |
17098 | (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR |
17099 | : AArch64SysReg::RNDRRS); |
17100 | SDLoc DL(N); |
17101 | SDValue A = DAG.getNode( |
17102 | AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other), |
17103 | N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64)); |
17104 | SDValue B = DAG.getNode( |
17105 | AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), |
17106 | DAG.getConstant(0, DL, MVT::i32), |
17107 | DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1)); |
17108 | return DAG.getMergeValues( |
17109 | {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); |
17110 | } |
17111 | default: |
17112 | break; |
17113 | } |
17114 | break; |
17115 | case ISD::GlobalAddress: |
17116 | return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine()); |
17117 | } |
17118 | return SDValue(); |
17119 | } |
17120 | |
17121 | |
17122 | |
17123 | |
17124 | |
17125 | bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, |
17126 | SDValue &Chain) const { |
17127 | if (N->getNumValues() != 1) |
17128 | return false; |
17129 | if (!N->hasNUsesOfValue(1, 0)) |
17130 | return false; |
17131 | |
17132 | SDValue TCChain = Chain; |
17133 | SDNode *Copy = *N->use_begin(); |
17134 | if (Copy->getOpcode() == ISD::CopyToReg) { |
17135 | |
17136 | |
17137 | if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == |
17138 | MVT::Glue) |
17139 | return false; |
17140 | TCChain = Copy->getOperand(0); |
17141 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
17142 | return false; |
17143 | |
17144 | bool HasRet = false; |
17145 | for (SDNode *Node : Copy->uses()) { |
17146 | if (Node->getOpcode() != AArch64ISD::RET_FLAG) |
17147 | return false; |
17148 | HasRet = true; |
17149 | } |
17150 | |
17151 | if (!HasRet) |
17152 | return false; |
17153 | |
17154 | Chain = TCChain; |
17155 | return true; |
17156 | } |
17157 | |
17158 | |
17159 | |
17160 | |
17161 | |
17162 | bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
17163 | return CI->isTailCall(); |
17164 | } |
17165 | |
17166 | bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, |
17167 | SDValue &Offset, |
17168 | ISD::MemIndexedMode &AM, |
17169 | bool &IsInc, |
17170 | SelectionDAG &DAG) const { |
17171 | if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) |
17172 | return false; |
17173 | |
17174 | Base = Op->getOperand(0); |
17175 | |
17176 | |
17177 | if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { |
17178 | int64_t RHSC = RHS->getSExtValue(); |
17179 | if (Op->getOpcode() == ISD::SUB) |
17180 | RHSC = -(uint64_t)RHSC; |
17181 | if (!isInt<9>(RHSC)) |
17182 | return false; |
17183 | IsInc = (Op->getOpcode() == ISD::ADD); |
17184 | Offset = Op->getOperand(1); |
17185 | return true; |
17186 | } |
17187 | return false; |
17188 | } |
17189 | |
17190 | bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, |
17191 | SDValue &Offset, |
17192 | ISD::MemIndexedMode &AM, |
17193 | SelectionDAG &DAG) const { |
17194 | EVT VT; |
17195 | SDValue Ptr; |
17196 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
17197 | VT = LD->getMemoryVT(); |
17198 | Ptr = LD->getBasePtr(); |
17199 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { |
17200 | VT = ST->getMemoryVT(); |
17201 | Ptr = ST->getBasePtr(); |
17202 | } else |
17203 | return false; |
17204 | |
17205 | bool IsInc; |
17206 | if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) |
17207 | return false; |
17208 | AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; |
17209 | return true; |
17210 | } |
17211 | |
17212 | bool AArch64TargetLowering::getPostIndexedAddressParts( |
17213 | SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, |
17214 | ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { |
17215 | EVT VT; |
17216 | SDValue Ptr; |
17217 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { |
17218 | VT = LD->getMemoryVT(); |
17219 | Ptr = LD->getBasePtr(); |
17220 | } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { |
17221 | VT = ST->getMemoryVT(); |
17222 | Ptr = ST->getBasePtr(); |
17223 | } else |
17224 | return false; |
17225 | |
17226 | bool IsInc; |
17227 | if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) |
17228 | return false; |
17229 | |
17230 | |
17231 | if (Ptr != Base) |
17232 | return false; |
17233 | AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; |
17234 | return true; |
17235 | } |
17236 | |
17237 | void AArch64TargetLowering::ReplaceBITCASTResults( |
17238 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
17239 | SDLoc DL(N); |
17240 | SDValue Op = N->getOperand(0); |
17241 | EVT VT = N->getValueType(0); |
17242 | EVT SrcVT = Op.getValueType(); |
17243 | |
17244 | if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) { |
17245 | assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
17246 | "Expected fp->int bitcast!"); |
17247 | SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG); |
17248 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult)); |
17249 | return; |
17250 | } |
17251 | |
17252 | if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16)) |
17253 | return; |
17254 | |
17255 | Op = SDValue( |
17256 | DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32, |
17257 | DAG.getUNDEF(MVT::i32), Op, |
17258 | DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), |
17259 | 0); |
17260 | Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op); |
17261 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); |
17262 | } |
17263 | |
17264 | static void ReplaceReductionResults(SDNode *N, |
17265 | SmallVectorImpl<SDValue> &Results, |
17266 | SelectionDAG &DAG, unsigned InterOp, |
17267 | unsigned AcrossOp) { |
17268 | EVT LoVT, HiVT; |
17269 | SDValue Lo, Hi; |
17270 | SDLoc dl(N); |
17271 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); |
17272 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
17273 | SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi); |
17274 | SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal); |
17275 | Results.push_back(SplitVal); |
17276 | } |
17277 | |
17278 | static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) { |
17279 | SDLoc DL(N); |
17280 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N); |
17281 | SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, |
17282 | DAG.getNode(ISD::SRL, DL, MVT::i128, N, |
17283 | DAG.getConstant(64, DL, MVT::i64))); |
17284 | return std::make_pair(Lo, Hi); |
17285 | } |
17286 | |
17287 | void AArch64TargetLowering::ReplaceExtractSubVectorResults( |
17288 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
17289 | SDValue In = N->getOperand(0); |
17290 | EVT InVT = In.getValueType(); |
17291 | |
17292 | |
17293 | if (!InVT.isScalableVector() || !InVT.isInteger()) |
17294 | return; |
17295 | |
17296 | SDLoc DL(N); |
17297 | EVT VT = N->getValueType(0); |
17298 | |
17299 | |
17300 | |
17301 | ElementCount ResEC = VT.getVectorElementCount(); |
17302 | |
17303 | if (InVT.getVectorElementCount() != (ResEC * 2)) |
17304 | return; |
17305 | |
17306 | auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
17307 | if (!CIndex) |
17308 | return; |
17309 | |
17310 | unsigned Index = CIndex->getZExtValue(); |
17311 | if ((Index != 0) && (Index != ResEC.getKnownMinValue())) |
17312 | return; |
17313 | |
17314 | unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI; |
17315 | EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext()); |
17316 | |
17317 | SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0)); |
17318 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half)); |
17319 | } |
17320 | |
17321 | |
17322 | static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { |
17323 | SDLoc dl(V.getNode()); |
17324 | SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64); |
17325 | SDValue VHi = DAG.getAnyExtOrTrunc( |
17326 | DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)), |
17327 | dl, MVT::i64); |
17328 | if (DAG.getDataLayout().isBigEndian()) |
17329 | std::swap (VLo, VHi); |
17330 | SDValue RegClass = |
17331 | DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32); |
17332 | SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32); |
17333 | SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32); |
17334 | const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; |
17335 | return SDValue( |
17336 | DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); |
17337 | } |
17338 | |
17339 | static void ReplaceCMP_SWAP_128Results(SDNode *N, |
17340 | SmallVectorImpl<SDValue> &Results, |
17341 | SelectionDAG &DAG, |
17342 | const AArch64Subtarget *Subtarget) { |
17343 | assert(N->getValueType(0) == MVT::i128 && |
17344 | "AtomicCmpSwap on types less than 128 should be legal"); |
17345 | |
17346 | MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); |
17347 | if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) { |
17348 | |
17349 | |
17350 | SDValue Ops[] = { |
17351 | createGPRPairNode(DAG, N->getOperand(2)), |
17352 | createGPRPairNode(DAG, N->getOperand(3)), |
17353 | N->getOperand(1), |
17354 | N->getOperand(0), |
17355 | }; |
17356 | |
17357 | unsigned Opcode; |
17358 | switch (MemOp->getMergedOrdering()) { |
17359 | case AtomicOrdering::Monotonic: |
17360 | Opcode = AArch64::CASPX; |
17361 | break; |
17362 | case AtomicOrdering::Acquire: |
17363 | Opcode = AArch64::CASPAX; |
17364 | break; |
17365 | case AtomicOrdering::Release: |
17366 | Opcode = AArch64::CASPLX; |
17367 | break; |
17368 | case AtomicOrdering::AcquireRelease: |
17369 | case AtomicOrdering::SequentiallyConsistent: |
17370 | Opcode = AArch64::CASPALX; |
17371 | break; |
17372 | default: |
17373 | llvm_unreachable("Unexpected ordering!"); |
17374 | } |
17375 | |
17376 | MachineSDNode *CmpSwap = DAG.getMachineNode( |
17377 | Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops); |
17378 | DAG.setNodeMemRefs(CmpSwap, {MemOp}); |
17379 | |
17380 | unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64; |
17381 | if (DAG.getDataLayout().isBigEndian()) |
17382 | std::swap(SubReg1, SubReg2); |
17383 | SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64, |
17384 | SDValue(CmpSwap, 0)); |
17385 | SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64, |
17386 | SDValue(CmpSwap, 0)); |
17387 | Results.push_back( |
17388 | DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi)); |
17389 | Results.push_back(SDValue(CmpSwap, 1)); |
17390 | return; |
17391 | } |
17392 | |
17393 | unsigned Opcode; |
17394 | switch (MemOp->getMergedOrdering()) { |
17395 | case AtomicOrdering::Monotonic: |
17396 | Opcode = AArch64::CMP_SWAP_128_MONOTONIC; |
17397 | break; |
17398 | case AtomicOrdering::Acquire: |
17399 | Opcode = AArch64::CMP_SWAP_128_ACQUIRE; |
17400 | break; |
17401 | case AtomicOrdering::Release: |
17402 | Opcode = AArch64::CMP_SWAP_128_RELEASE; |
17403 | break; |
17404 | case AtomicOrdering::AcquireRelease: |
17405 | case AtomicOrdering::SequentiallyConsistent: |
17406 | Opcode = AArch64::CMP_SWAP_128; |
17407 | break; |
17408 | default: |
17409 | llvm_unreachable("Unexpected ordering!"); |
17410 | } |
17411 | |
17412 | auto Desired = splitInt128(N->getOperand(2), DAG); |
17413 | auto New = splitInt128(N->getOperand(3), DAG); |
17414 | SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second, |
17415 | New.first, New.second, N->getOperand(0)}; |
17416 | SDNode *CmpSwap = DAG.getMachineNode( |
17417 | Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), |
17418 | Ops); |
17419 | DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); |
17420 | |
17421 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, |
17422 | SDValue(CmpSwap, 0), SDValue(CmpSwap, 1))); |
17423 | Results.push_back(SDValue(CmpSwap, 3)); |
17424 | } |
17425 | |
17426 | void AArch64TargetLowering::ReplaceNodeResults( |
17427 | SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
17428 | switch (N->getOpcode()) { |
17429 | default: |
17430 | llvm_unreachable("Don't know how to custom expand this"); |
17431 | case ISD::BITCAST: |
17432 | ReplaceBITCASTResults(N, Results, DAG); |
17433 | return; |
17434 | case ISD::VECREDUCE_ADD: |
17435 | case ISD::VECREDUCE_SMAX: |
17436 | case ISD::VECREDUCE_SMIN: |
17437 | case ISD::VECREDUCE_UMAX: |
17438 | case ISD::VECREDUCE_UMIN: |
17439 | Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG)); |
17440 | return; |
17441 | |
17442 | case ISD::CTPOP: |
17443 | if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) |
17444 | Results.push_back(Result); |
17445 | return; |
17446 | case AArch64ISD::SADDV: |
17447 | ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV); |
17448 | return; |
17449 | case AArch64ISD::UADDV: |
17450 | ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV); |
17451 | return; |
17452 | case AArch64ISD::SMINV: |
17453 | ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV); |
17454 | return; |
17455 | case AArch64ISD::UMINV: |
17456 | ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV); |
17457 | return; |
17458 | case AArch64ISD::SMAXV: |
17459 | ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV); |
17460 | return; |
17461 | case AArch64ISD::UMAXV: |
17462 | ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV); |
17463 | return; |
17464 | case ISD::FP_TO_UINT: |
17465 | case ISD::FP_TO_SINT: |
17466 | case ISD::STRICT_FP_TO_SINT: |
17467 | case ISD::STRICT_FP_TO_UINT: |
17468 | assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); |
17469 | |
17470 | return; |
17471 | case ISD::ATOMIC_CMP_SWAP: |
17472 | ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); |
17473 | return; |
17474 | case ISD::LOAD: { |
17475 | assert(SDValue(N, 0).getValueType() == MVT::i128 && |
17476 | "unexpected load's value type"); |
17477 | LoadSDNode *LoadNode = cast<LoadSDNode>(N); |
17478 | if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) { |
17479 | |
17480 | |
17481 | return; |
17482 | } |
17483 | |
17484 | SDValue Result = DAG.getMemIntrinsicNode( |
17485 | AArch64ISD::LDP, SDLoc(N), |
17486 | DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), |
17487 | {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(), |
17488 | LoadNode->getMemOperand()); |
17489 | |
17490 | SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, |
17491 | Result.getValue(0), Result.getValue(1)); |
17492 | Results.append({Pair, Result.getValue(2) }); |
17493 | return; |
17494 | } |
17495 | case ISD::EXTRACT_SUBVECTOR: |
17496 | ReplaceExtractSubVectorResults(N, Results, DAG); |
17497 | return; |
17498 | case ISD::INSERT_SUBVECTOR: |
17499 | |
17500 | |
17501 | return; |
17502 | case ISD::INTRINSIC_WO_CHAIN: { |
17503 | EVT VT = N->getValueType(0); |
17504 | assert((VT == MVT::i8 || VT == MVT::i16) && |
17505 | "custom lowering for unexpected type"); |
17506 | |
17507 | ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0)); |
17508 | Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); |
17509 | switch (IntID) { |
17510 | default: |
17511 | return; |
17512 | case Intrinsic::aarch64_sve_clasta_n: { |
17513 | SDLoc DL(N); |
17514 | auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); |
17515 | auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, |
17516 | N->getOperand(1), Op2, N->getOperand(3)); |
17517 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); |
17518 | return; |
17519 | } |
17520 | case Intrinsic::aarch64_sve_clastb_n: { |
17521 | SDLoc DL(N); |
17522 | auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); |
17523 | auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, |
17524 | N->getOperand(1), Op2, N->getOperand(3)); |
17525 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); |
17526 | return; |
17527 | } |
17528 | case Intrinsic::aarch64_sve_lasta: { |
17529 | SDLoc DL(N); |
17530 | auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, |
17531 | N->getOperand(1), N->getOperand(2)); |
17532 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); |
17533 | return; |
17534 | } |
17535 | case Intrinsic::aarch64_sve_lastb: { |
17536 | SDLoc DL(N); |
17537 | auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, |
17538 | N->getOperand(1), N->getOperand(2)); |
17539 | Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); |
17540 | return; |
17541 | } |
17542 | } |
17543 | } |
17544 | } |
17545 | } |
17546 | |
17547 | bool AArch64TargetLowering::useLoadStackGuardNode() const { |
17548 | if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia()) |
17549 | return TargetLowering::useLoadStackGuardNode(); |
17550 | return true; |
17551 | } |
17552 | |
17553 | unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { |
17554 | |
17555 | |
17556 | return 3; |
17557 | } |
17558 | |
17559 | TargetLoweringBase::LegalizeTypeAction |
17560 | AArch64TargetLowering::getPreferredVectorAction(MVT VT) const { |
17561 | |
17562 | |
17563 | if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 || |
17564 | VT == MVT::v1f32) |
17565 | return TypeWidenVector; |
17566 | |
17567 | return TargetLoweringBase::getPreferredVectorAction(VT); |
17568 | } |
17569 | |
17570 | |
17571 | |
17572 | |
17573 | bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
17574 | unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); |
17575 | return Size == 128; |
17576 | } |
17577 | |
17578 | |
17579 | |
17580 | |
17581 | TargetLowering::AtomicExpansionKind |
17582 | AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
17583 | unsigned Size = LI->getType()->getPrimitiveSizeInBits(); |
17584 | return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; |
17585 | } |
17586 | |
17587 | |
17588 | TargetLowering::AtomicExpansionKind |
17589 | AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
17590 | if (AI->isFloatingPointOperation()) |
17591 | return AtomicExpansionKind::CmpXChg; |
17592 | |
17593 | unsigned Size = AI->getType()->getPrimitiveSizeInBits(); |
17594 | if (Size > 128) return AtomicExpansionKind::None; |
17595 | |
17596 | |
17597 | |
17598 | if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { |
17599 | if (Subtarget->hasLSE()) |
17600 | return AtomicExpansionKind::None; |
17601 | if (Subtarget->outlineAtomics()) { |
17602 | |
17603 | |
17604 | |
17605 | |
17606 | |
17607 | |
17608 | if (AI->getOperation() != AtomicRMWInst::Min && |
17609 | AI->getOperation() != AtomicRMWInst::Max && |
17610 | AI->getOperation() != AtomicRMWInst::UMin && |
17611 | AI->getOperation() != AtomicRMWInst::UMax) { |
17612 | return AtomicExpansionKind::None; |
17613 | } |
17614 | } |
17615 | } |
17616 | |
17617 | |
17618 | |
17619 | |
17620 | |
17621 | |
17622 | if (getTargetMachine().getOptLevel() == CodeGenOpt::None) |
17623 | return AtomicExpansionKind::CmpXChg; |
17624 | |
17625 | return AtomicExpansionKind::LLSC; |
17626 | } |
17627 | |
17628 | TargetLowering::AtomicExpansionKind |
17629 | AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( |
17630 | AtomicCmpXchgInst *AI) const { |
17631 | |
17632 | if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) |
17633 | return AtomicExpansionKind::None; |
17634 | |
17635 | |
17636 | |
17637 | |
17638 | |
17639 | if (getTargetMachine().getOptLevel() == CodeGenOpt::None) |
17640 | return AtomicExpansionKind::None; |
17641 | |
17642 | |
17643 | |
17644 | unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); |
17645 | if (Size > 64) |
17646 | return AtomicExpansionKind::None; |
17647 | |
17648 | return AtomicExpansionKind::LLSC; |
17649 | } |
17650 | |
17651 | Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder, |
17652 | Type *ValueTy, Value *Addr, |
17653 | AtomicOrdering Ord) const { |
17654 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
17655 | bool IsAcquire = isAcquireOrStronger(Ord); |
17656 | |
17657 | |
17658 | |
17659 | |
17660 | if (ValueTy->getPrimitiveSizeInBits() == 128) { |
17661 | Intrinsic::ID Int = |
17662 | IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; |
17663 | Function *Ldxr = Intrinsic::getDeclaration(M, Int); |
17664 | |
17665 | Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); |
17666 | Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); |
17667 | |
17668 | Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); |
17669 | Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); |
17670 | Lo = Builder.CreateZExt(Lo, ValueTy, "lo64"); |
17671 | Hi = Builder.CreateZExt(Hi, ValueTy, "hi64"); |
17672 | return Builder.CreateOr( |
17673 | Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64"); |
17674 | } |
17675 | |
17676 | Type *Tys[] = { Addr->getType() }; |
17677 | Intrinsic::ID Int = |
17678 | IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; |
17679 | Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); |
17680 | |
17681 | const DataLayout &DL = M->getDataLayout(); |
17682 | IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy)); |
17683 | Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy); |
17684 | |
17685 | return Builder.CreateBitCast(Trunc, ValueTy); |
17686 | } |
17687 | |
17688 | void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( |
17689 | IRBuilderBase &Builder) const { |
17690 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
17691 | Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); |
17692 | } |
17693 | |
17694 | Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder, |
17695 | Value *Val, Value *Addr, |
17696 | AtomicOrdering Ord) const { |
17697 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
17698 | bool IsRelease = isReleaseOrStronger(Ord); |
17699 | |
17700 | |
17701 | |
17702 | |
17703 | if (Val->getType()->getPrimitiveSizeInBits() == 128) { |
17704 | Intrinsic::ID Int = |
17705 | IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp; |
17706 | Function *Stxr = Intrinsic::getDeclaration(M, Int); |
17707 | Type *Int64Ty = Type::getInt64Ty(M->getContext()); |
17708 | |
17709 | Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); |
17710 | Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); |
17711 | Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); |
17712 | return Builder.CreateCall(Stxr, {Lo, Hi, Addr}); |
17713 | } |
17714 | |
17715 | Intrinsic::ID Int = |
17716 | IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr; |
17717 | Type *Tys[] = { Addr->getType() }; |
17718 | Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); |
17719 | |
17720 | const DataLayout &DL = M->getDataLayout(); |
17721 | IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); |
17722 | Val = Builder.CreateBitCast(Val, IntValTy); |
17723 | |
17724 | return Builder.CreateCall(Stxr, |
17725 | {Builder.CreateZExtOrBitCast( |
17726 | Val, Stxr->getFunctionType()->getParamType(0)), |
17727 | Addr}); |
17728 | } |
17729 | |
17730 | bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( |
17731 | Type *Ty, CallingConv::ID CallConv, bool isVarArg, |
17732 | const DataLayout &DL) const { |
17733 | if (!Ty->isArrayTy()) { |
17734 | const TypeSize &TySize = Ty->getPrimitiveSizeInBits(); |
17735 | return TySize.isScalable() && TySize.getKnownMinSize() > 128; |
17736 | } |
17737 | |
17738 | |
17739 | SmallVector<EVT> ValueVTs; |
17740 | ComputeValueVTs(*this, DL, Ty, ValueVTs); |
17741 | return is_splat(ValueVTs); |
17742 | } |
17743 | |
17744 | bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, |
17745 | EVT) const { |
17746 | return false; |
17747 | } |
17748 | |
17749 | static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) { |
17750 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
17751 | Function *ThreadPointerFunc = |
17752 | Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); |
17753 | return IRB.CreatePointerCast( |
17754 | IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc), |
17755 | Offset), |
17756 | IRB.getInt8PtrTy()->getPointerTo(0)); |
17757 | } |
17758 | |
17759 | Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
17760 | |
17761 | |
17762 | |
17763 | if (Subtarget->isTargetAndroid()) |
17764 | return UseTlsOffset(IRB, 0x28); |
17765 | |
17766 | |
17767 | |
17768 | if (Subtarget->isTargetFuchsia()) |
17769 | return UseTlsOffset(IRB, -0x10); |
17770 | |
17771 | return TargetLowering::getIRStackGuard(IRB); |
17772 | } |
17773 | |
17774 | void AArch64TargetLowering::insertSSPDeclarations(Module &M) const { |
17775 | |
17776 | if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) { |
17777 | |
17778 | M.getOrInsertGlobal("__security_cookie", |
17779 | Type::getInt8PtrTy(M.getContext())); |
17780 | |
17781 | |
17782 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
17783 | "__security_check_cookie", Type::getVoidTy(M.getContext()), |
17784 | Type::getInt8PtrTy(M.getContext())); |
17785 | if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
17786 | F->setCallingConv(CallingConv::Win64); |
17787 | F->addParamAttr(0, Attribute::AttrKind::InReg); |
17788 | } |
17789 | return; |
17790 | } |
17791 | TargetLowering::insertSSPDeclarations(M); |
17792 | } |
17793 | |
17794 | Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const { |
17795 | |
17796 | if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) |
17797 | return M.getGlobalVariable("__security_cookie"); |
17798 | return TargetLowering::getSDagStackGuard(M); |
17799 | } |
17800 | |
17801 | Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
17802 | |
17803 | if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) |
17804 | return M.getFunction("__security_check_cookie"); |
17805 | return TargetLowering::getSSPStackGuardCheck(M); |
17806 | } |
17807 | |
17808 | Value * |
17809 | AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
17810 | |
17811 | |
17812 | |
17813 | if (Subtarget->isTargetAndroid()) |
17814 | return UseTlsOffset(IRB, 0x48); |
17815 | |
17816 | |
17817 | |
17818 | if (Subtarget->isTargetFuchsia()) |
17819 | return UseTlsOffset(IRB, -0x8); |
17820 | |
17821 | return TargetLowering::getSafeStackPointerLocation(IRB); |
17822 | } |
17823 | |
17824 | bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( |
17825 | const Instruction &AndI) const { |
17826 | |
17827 | |
17828 | |
17829 | |
17830 | |
17831 | ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); |
17832 | if (!Mask) |
17833 | return false; |
17834 | return Mask->getValue().isPowerOf2(); |
17835 | } |
17836 | |
17837 | bool AArch64TargetLowering:: |
17838 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
17839 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
17840 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
17841 | SelectionDAG &DAG) const { |
17842 | |
17843 | if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
17844 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) |
17845 | return false; |
17846 | |
17847 | return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; |
17848 | } |
17849 | |
17850 | bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG, |
17851 | SDNode *N) const { |
17852 | if (DAG.getMachineFunction().getFunction().hasMinSize() && |
17853 | !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin()) |
17854 | return false; |
17855 | return true; |
17856 | } |
17857 | |
17858 | void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { |
17859 | |
17860 | AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>(); |
17861 | AFI->setIsSplitCSR(true); |
17862 | } |
17863 | |
17864 | void AArch64TargetLowering::insertCopiesSplitCSR( |
17865 | MachineBasicBlock *Entry, |
17866 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
17867 | const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); |
17868 | const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); |
17869 | if (!IStart) |
17870 | return; |
17871 | |
17872 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
17873 | MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); |
17874 | MachineBasicBlock::iterator MBBI = Entry->begin(); |
17875 | for (const MCPhysReg *I = IStart; *I; ++I) { |
17876 | const TargetRegisterClass *RC = nullptr; |
17877 | if (AArch64::GPR64RegClass.contains(*I)) |
17878 | RC = &AArch64::GPR64RegClass; |
17879 | else if (AArch64::FPR64RegClass.contains(*I)) |
17880 | RC = &AArch64::FPR64RegClass; |
17881 | else |
17882 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
17883 | |
17884 | Register NewVR = MRI->createVirtualRegister(RC); |
17885 | |
17886 | |
17887 | |
17888 | |
17889 | |
17890 | assert(Entry->getParent()->getFunction().hasFnAttribute( |
17891 | Attribute::NoUnwind) && |
17892 | "Function should be nounwind in insertCopiesSplitCSR!"); |
17893 | Entry->addLiveIn(*I); |
17894 | BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) |
17895 | .addReg(*I); |
17896 | |
17897 | |
17898 | for (auto *Exit : Exits) |
17899 | BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), |
17900 | TII->get(TargetOpcode::COPY), *I) |
17901 | .addReg(NewVR); |
17902 | } |
17903 | } |
17904 | |
17905 | bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
17906 | |
17907 | |
17908 | |
17909 | |
17910 | |
17911 | |
17912 | |
17913 | bool OptSize = Attr.hasFnAttr(Attribute::MinSize); |
17914 | return OptSize && !VT.isVector(); |
17915 | } |
17916 | |
17917 | bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { |
17918 | |
17919 | return VT.isScalarInteger(); |
17920 | } |
17921 | |
17922 | bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const { |
17923 | return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint(); |
17924 | } |
17925 | |
17926 | unsigned |
17927 | AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { |
17928 | if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) |
17929 | return getPointerTy(DL).getSizeInBits(); |
17930 | |
17931 | return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; |
17932 | } |
17933 | |
17934 | void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const { |
17935 | MF.getFrameInfo().computeMaxCallFrameSize(MF); |
17936 | TargetLoweringBase::finalizeLowering(MF); |
17937 | } |
17938 | |
17939 | |
17940 | bool AArch64TargetLowering::needsFixedCatchObjects() const { |
17941 | return false; |
17942 | } |
17943 | |
17944 | bool AArch64TargetLowering::shouldLocalize( |
17945 | const MachineInstr &MI, const TargetTransformInfo *TTI) const { |
17946 | switch (MI.getOpcode()) { |
17947 | case TargetOpcode::G_GLOBAL_VALUE: { |
17948 | |
17949 | |
17950 | |
17951 | const GlobalValue &GV = *MI.getOperand(1).getGlobal(); |
17952 | if (GV.isThreadLocal() && Subtarget->isTargetMachO()) |
17953 | return false; |
17954 | break; |
17955 | } |
17956 | |
17957 | |
17958 | case AArch64::ADRP: |
17959 | case AArch64::G_ADD_LOW: |
17960 | return true; |
17961 | default: |
17962 | break; |
17963 | } |
17964 | return TargetLoweringBase::shouldLocalize(MI, TTI); |
17965 | } |
17966 | |
17967 | bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { |
17968 | if (isa<ScalableVectorType>(Inst.getType())) |
17969 | return true; |
17970 | |
17971 | for (unsigned i = 0; i < Inst.getNumOperands(); ++i) |
17972 | if (isa<ScalableVectorType>(Inst.getOperand(i)->getType())) |
17973 | return true; |
17974 | |
17975 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { |
17976 | if (isa<ScalableVectorType>(AI->getAllocatedType())) |
17977 | return true; |
17978 | } |
17979 | |
17980 | return false; |
17981 | } |
17982 | |
17983 | |
17984 | static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) { |
17985 | assert(VT.isFixedLengthVector() && |
17986 | DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
17987 | "Expected legal fixed length vector!"); |
17988 | switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { |
17989 | default: |
17990 | llvm_unreachable("unexpected element type for SVE container"); |
17991 | case MVT::i8: |
17992 | return EVT(MVT::nxv16i8); |
17993 | case MVT::i16: |
17994 | return EVT(MVT::nxv8i16); |
17995 | case MVT::i32: |
17996 | return EVT(MVT::nxv4i32); |
17997 | case MVT::i64: |
17998 | return EVT(MVT::nxv2i64); |
17999 | case MVT::f16: |
18000 | return EVT(MVT::nxv8f16); |
18001 | case MVT::f32: |
18002 | return EVT(MVT::nxv4f32); |
18003 | case MVT::f64: |
18004 | return EVT(MVT::nxv2f64); |
18005 | } |
18006 | } |
18007 | |
18008 | |
18009 | static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, |
18010 | EVT VT) { |
18011 | assert(VT.isFixedLengthVector() && |
18012 | DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
18013 | "Expected legal fixed length vector!"); |
18014 | |
18015 | unsigned PgPattern = |
18016 | getSVEPredPatternFromNumElements(VT.getVectorNumElements()); |
18017 | assert(PgPattern && "Unexpected element count for SVE predicate"); |
18018 | |
18019 | |
18020 | |
18021 | |
18022 | const auto &Subtarget = |
18023 | static_cast<const AArch64Subtarget &>(DAG.getSubtarget()); |
18024 | unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); |
18025 | unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); |
18026 | if (MaxSVESize && MinSVESize == MaxSVESize && |
18027 | MaxSVESize == VT.getSizeInBits()) |
18028 | PgPattern = AArch64SVEPredPattern::all; |
18029 | |
18030 | MVT MaskVT; |
18031 | switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { |
18032 | default: |
18033 | llvm_unreachable("unexpected element type for SVE predicate"); |
18034 | case MVT::i8: |
18035 | MaskVT = MVT::nxv16i1; |
18036 | break; |
18037 | case MVT::i16: |
18038 | case MVT::f16: |
18039 | MaskVT = MVT::nxv8i1; |
18040 | break; |
18041 | case MVT::i32: |
18042 | case MVT::f32: |
18043 | MaskVT = MVT::nxv4i1; |
18044 | break; |
18045 | case MVT::i64: |
18046 | case MVT::f64: |
18047 | MaskVT = MVT::nxv2i1; |
18048 | break; |
18049 | } |
18050 | |
18051 | return getPTrue(DAG, DL, MaskVT, PgPattern); |
18052 | } |
18053 | |
18054 | static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, |
18055 | EVT VT) { |
18056 | assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && |
18057 | "Expected legal scalable vector!"); |
18058 | auto PredTy = VT.changeVectorElementType(MVT::i1); |
18059 | return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all); |
18060 | } |
18061 | |
18062 | static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) { |
18063 | if (VT.isFixedLengthVector()) |
18064 | return getPredicateForFixedLengthVector(DAG, DL, VT); |
18065 | |
18066 | return getPredicateForScalableVector(DAG, DL, VT); |
18067 | } |
18068 | |
18069 | |
18070 | static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) { |
18071 | assert(VT.isScalableVector() && |
18072 | "Expected to convert into a scalable vector!"); |
18073 | assert(V.getValueType().isFixedLengthVector() && |
18074 | "Expected a fixed length vector operand!"); |
18075 | SDLoc DL(V); |
18076 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
18077 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); |
18078 | } |
18079 | |
18080 | |
18081 | static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) { |
18082 | assert(VT.isFixedLengthVector() && |
18083 | "Expected to convert into a fixed length vector!"); |
18084 | assert(V.getValueType().isScalableVector() && |
18085 | "Expected a scalable vector operand!"); |
18086 | SDLoc DL(V); |
18087 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
18088 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero); |
18089 | } |
18090 | |
18091 | |
18092 | SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE( |
18093 | SDValue Op, SelectionDAG &DAG) const { |
18094 | auto Load = cast<LoadSDNode>(Op); |
18095 | |
18096 | SDLoc DL(Op); |
18097 | EVT VT = Op.getValueType(); |
18098 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18099 | |
18100 | auto NewLoad = DAG.getMaskedLoad( |
18101 | ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), |
18102 | getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT), |
18103 | Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(), |
18104 | Load->getExtensionType()); |
18105 | |
18106 | auto Result = convertFromScalableVector(DAG, VT, NewLoad); |
18107 | SDValue MergedValues[2] = {Result, Load->getChain()}; |
18108 | return DAG.getMergeValues(MergedValues, DL); |
18109 | } |
18110 | |
18111 | static SDValue convertFixedMaskToScalableVector(SDValue Mask, |
18112 | SelectionDAG &DAG) { |
18113 | SDLoc DL(Mask); |
18114 | EVT InVT = Mask.getValueType(); |
18115 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); |
18116 | |
18117 | auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask); |
18118 | auto Op2 = DAG.getConstant(0, DL, ContainerVT); |
18119 | auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT); |
18120 | |
18121 | EVT CmpVT = Pg.getValueType(); |
18122 | return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT, |
18123 | {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)}); |
18124 | } |
18125 | |
18126 | |
18127 | SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE( |
18128 | SDValue Op, SelectionDAG &DAG) const { |
18129 | auto Load = cast<MaskedLoadSDNode>(Op); |
18130 | |
18131 | if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD) |
18132 | return SDValue(); |
18133 | |
18134 | SDLoc DL(Op); |
18135 | EVT VT = Op.getValueType(); |
18136 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18137 | |
18138 | SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG); |
18139 | |
18140 | SDValue PassThru; |
18141 | bool IsPassThruZeroOrUndef = false; |
18142 | |
18143 | if (Load->getPassThru()->isUndef()) { |
18144 | PassThru = DAG.getUNDEF(ContainerVT); |
18145 | IsPassThruZeroOrUndef = true; |
18146 | } else { |
18147 | if (ContainerVT.isInteger()) |
18148 | PassThru = DAG.getConstant(0, DL, ContainerVT); |
18149 | else |
18150 | PassThru = DAG.getConstantFP(0, DL, ContainerVT); |
18151 | if (isZerosVector(Load->getPassThru().getNode())) |
18152 | IsPassThruZeroOrUndef = true; |
18153 | } |
18154 | |
18155 | auto NewLoad = DAG.getMaskedLoad( |
18156 | ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), |
18157 | Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(), |
18158 | Load->getAddressingMode(), Load->getExtensionType()); |
18159 | |
18160 | if (!IsPassThruZeroOrUndef) { |
18161 | SDValue OldPassThru = |
18162 | convertToScalableVector(DAG, ContainerVT, Load->getPassThru()); |
18163 | NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru); |
18164 | } |
18165 | |
18166 | auto Result = convertFromScalableVector(DAG, VT, NewLoad); |
18167 | SDValue MergedValues[2] = {Result, Load->getChain()}; |
18168 | return DAG.getMergeValues(MergedValues, DL); |
18169 | } |
18170 | |
18171 | |
18172 | SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE( |
18173 | SDValue Op, SelectionDAG &DAG) const { |
18174 | auto Store = cast<StoreSDNode>(Op); |
18175 | |
18176 | SDLoc DL(Op); |
18177 | EVT VT = Store->getValue().getValueType(); |
18178 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18179 | |
18180 | auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue()); |
18181 | return DAG.getMaskedStore( |
18182 | Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(), |
18183 | getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(), |
18184 | Store->getMemOperand(), Store->getAddressingMode(), |
18185 | Store->isTruncatingStore()); |
18186 | } |
18187 | |
18188 | SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE( |
18189 | SDValue Op, SelectionDAG &DAG) const { |
18190 | auto Store = cast<MaskedStoreSDNode>(Op); |
18191 | |
18192 | if (Store->isTruncatingStore()) |
18193 | return SDValue(); |
18194 | |
18195 | SDLoc DL(Op); |
18196 | EVT VT = Store->getValue().getValueType(); |
18197 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18198 | |
18199 | auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue()); |
18200 | SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG); |
18201 | |
18202 | return DAG.getMaskedStore( |
18203 | Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(), |
18204 | Mask, Store->getMemoryVT(), Store->getMemOperand(), |
18205 | Store->getAddressingMode(), Store->isTruncatingStore()); |
18206 | } |
18207 | |
18208 | SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( |
18209 | SDValue Op, SelectionDAG &DAG) const { |
18210 | SDLoc dl(Op); |
18211 | EVT VT = Op.getValueType(); |
18212 | EVT EltVT = VT.getVectorElementType(); |
18213 | |
18214 | bool Signed = Op.getOpcode() == ISD::SDIV; |
18215 | unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; |
18216 | |
18217 | |
18218 | if (EltVT == MVT::i32 || EltVT == MVT::i64) |
18219 | return LowerToPredicatedOp(Op, DAG, PredOpcode, true); |
18220 | |
18221 | |
18222 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18223 | EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext()); |
18224 | EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext()); |
18225 | EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT); |
18226 | |
18227 | |
18228 | EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext()); |
18229 | if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) { |
18230 | unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
18231 | SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0)); |
18232 | SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1)); |
18233 | SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1); |
18234 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Div); |
18235 | } |
18236 | |
18237 | |
18238 | SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); |
18239 | SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1)); |
18240 | |
18241 | |
18242 | unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; |
18243 | unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; |
18244 | SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0); |
18245 | SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1); |
18246 | SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0); |
18247 | SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1); |
18248 | |
18249 | |
18250 | Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo); |
18251 | Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo); |
18252 | Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi); |
18253 | Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi); |
18254 | SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, |
18255 | Op0Lo, Op1Lo); |
18256 | SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT, |
18257 | Op0Hi, Op1Hi); |
18258 | |
18259 | |
18260 | ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo); |
18261 | ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi); |
18262 | SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT, |
18263 | ResultLo, ResultHi); |
18264 | |
18265 | return convertFromScalableVector(DAG, VT, ScalableResult); |
18266 | } |
18267 | |
18268 | SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE( |
18269 | SDValue Op, SelectionDAG &DAG) const { |
18270 | EVT VT = Op.getValueType(); |
18271 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18272 | |
18273 | SDLoc DL(Op); |
18274 | SDValue Val = Op.getOperand(0); |
18275 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType()); |
18276 | Val = convertToScalableVector(DAG, ContainerVT, Val); |
18277 | |
18278 | bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND; |
18279 | unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; |
18280 | |
18281 | |
18282 | switch (ContainerVT.getSimpleVT().SimpleTy) { |
18283 | default: |
18284 | llvm_unreachable("unimplemented container type"); |
18285 | case MVT::nxv16i8: |
18286 | Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val); |
18287 | if (VT.getVectorElementType() == MVT::i16) |
18288 | break; |
18289 | LLVM_FALLTHROUGH; |
18290 | case MVT::nxv8i16: |
18291 | Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val); |
18292 | if (VT.getVectorElementType() == MVT::i32) |
18293 | break; |
18294 | LLVM_FALLTHROUGH; |
18295 | case MVT::nxv4i32: |
18296 | Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val); |
18297 | assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!"); |
18298 | break; |
18299 | } |
18300 | |
18301 | return convertFromScalableVector(DAG, VT, Val); |
18302 | } |
18303 | |
18304 | SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE( |
18305 | SDValue Op, SelectionDAG &DAG) const { |
18306 | EVT VT = Op.getValueType(); |
18307 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18308 | |
18309 | SDLoc DL(Op); |
18310 | SDValue Val = Op.getOperand(0); |
18311 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType()); |
18312 | Val = convertToScalableVector(DAG, ContainerVT, Val); |
18313 | |
18314 | |
18315 | switch (ContainerVT.getSimpleVT().SimpleTy) { |
18316 | default: |
18317 | llvm_unreachable("unimplemented container type"); |
18318 | case MVT::nxv2i64: |
18319 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val); |
18320 | Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val); |
18321 | if (VT.getVectorElementType() == MVT::i32) |
18322 | break; |
18323 | LLVM_FALLTHROUGH; |
18324 | case MVT::nxv4i32: |
18325 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val); |
18326 | Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val); |
18327 | if (VT.getVectorElementType() == MVT::i16) |
18328 | break; |
18329 | LLVM_FALLTHROUGH; |
18330 | case MVT::nxv8i16: |
18331 | Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val); |
18332 | Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val); |
18333 | assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!"); |
18334 | break; |
18335 | } |
18336 | |
18337 | return convertFromScalableVector(DAG, VT, Val); |
18338 | } |
18339 | |
18340 | SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt( |
18341 | SDValue Op, SelectionDAG &DAG) const { |
18342 | EVT VT = Op.getValueType(); |
18343 | EVT InVT = Op.getOperand(0).getValueType(); |
18344 | assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18345 | |
18346 | SDLoc DL(Op); |
18347 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); |
18348 | SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0)); |
18349 | |
18350 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1)); |
18351 | } |
18352 | |
18353 | SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt( |
18354 | SDValue Op, SelectionDAG &DAG) const { |
18355 | EVT VT = Op.getValueType(); |
18356 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18357 | |
18358 | SDLoc DL(Op); |
18359 | EVT InVT = Op.getOperand(0).getValueType(); |
18360 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); |
18361 | SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0)); |
18362 | |
18363 | auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0, |
18364 | Op.getOperand(1), Op.getOperand(2)); |
18365 | |
18366 | return convertFromScalableVector(DAG, VT, ScalableRes); |
18367 | } |
18368 | |
18369 | |
18370 | |
18371 | |
18372 | SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, |
18373 | SelectionDAG &DAG, |
18374 | unsigned NewOp, |
18375 | bool OverrideNEON) const { |
18376 | EVT VT = Op.getValueType(); |
18377 | SDLoc DL(Op); |
18378 | auto Pg = getPredicateForVector(DAG, DL, VT); |
18379 | |
18380 | if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) { |
18381 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18382 | |
18383 | |
18384 | SmallVector<SDValue, 4> Operands = {Pg}; |
18385 | for (const SDValue &V : Op->op_values()) { |
18386 | if (isa<CondCodeSDNode>(V)) { |
18387 | Operands.push_back(V); |
18388 | continue; |
18389 | } |
18390 | |
18391 | if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) { |
18392 | EVT VTArg = VTNode->getVT().getVectorElementType(); |
18393 | EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg); |
18394 | Operands.push_back(DAG.getValueType(NewVTArg)); |
18395 | continue; |
18396 | } |
18397 | |
18398 | assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && |
18399 | "Only fixed length vectors are supported!"); |
18400 | Operands.push_back(convertToScalableVector(DAG, ContainerVT, V)); |
18401 | } |
18402 | |
18403 | if (isMergePassthruOpcode(NewOp)) |
18404 | Operands.push_back(DAG.getUNDEF(ContainerVT)); |
18405 | |
18406 | auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands); |
18407 | return convertFromScalableVector(DAG, VT, ScalableRes); |
18408 | } |
18409 | |
18410 | assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); |
18411 | |
18412 | SmallVector<SDValue, 4> Operands = {Pg}; |
18413 | for (const SDValue &V : Op->op_values()) { |
18414 | assert((!V.getValueType().isVector() || |
18415 | V.getValueType().isScalableVector()) && |
18416 | "Only scalable vectors are supported!"); |
18417 | Operands.push_back(V); |
18418 | } |
18419 | |
18420 | if (isMergePassthruOpcode(NewOp)) |
18421 | Operands.push_back(DAG.getUNDEF(VT)); |
18422 | |
18423 | return DAG.getNode(NewOp, DL, VT, Operands); |
18424 | } |
18425 | |
18426 | |
18427 | |
18428 | |
18429 | SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op, |
18430 | SelectionDAG &DAG) const { |
18431 | EVT VT = Op.getValueType(); |
18432 | assert(useSVEForFixedLengthVectorVT(VT) && |
18433 | "Only expected to lower fixed length vector operation!"); |
18434 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18435 | |
18436 | |
18437 | SmallVector<SDValue, 4> Ops; |
18438 | for (const SDValue &V : Op->op_values()) { |
18439 | assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!"); |
18440 | |
18441 | |
18442 | if (!V.getValueType().isVector()) { |
18443 | Ops.push_back(V); |
18444 | continue; |
18445 | } |
18446 | |
18447 | |
18448 | assert(useSVEForFixedLengthVectorVT(V.getValueType()) && |
18449 | "Only fixed length vectors are supported!"); |
18450 | Ops.push_back(convertToScalableVector(DAG, ContainerVT, V)); |
18451 | } |
18452 | |
18453 | auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops); |
18454 | return convertFromScalableVector(DAG, VT, ScalableRes); |
18455 | } |
18456 | |
18457 | SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, |
18458 | SelectionDAG &DAG) const { |
18459 | SDLoc DL(ScalarOp); |
18460 | SDValue AccOp = ScalarOp.getOperand(0); |
18461 | SDValue VecOp = ScalarOp.getOperand(1); |
18462 | EVT SrcVT = VecOp.getValueType(); |
18463 | EVT ResVT = SrcVT.getVectorElementType(); |
18464 | |
18465 | EVT ContainerVT = SrcVT; |
18466 | if (SrcVT.isFixedLengthVector()) { |
18467 | ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT); |
18468 | VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); |
18469 | } |
18470 | |
18471 | SDValue Pg = getPredicateForVector(DAG, DL, SrcVT); |
18472 | SDValue Zero = DAG.getConstant(0, DL, MVT::i64); |
18473 | |
18474 | |
18475 | AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, |
18476 | DAG.getUNDEF(ContainerVT), AccOp, Zero); |
18477 | |
18478 | |
18479 | SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT, |
18480 | Pg, AccOp, VecOp); |
18481 | |
18482 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero); |
18483 | } |
18484 | |
18485 | SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp, |
18486 | SelectionDAG &DAG) const { |
18487 | SDLoc DL(ReduceOp); |
18488 | SDValue Op = ReduceOp.getOperand(0); |
18489 | EVT OpVT = Op.getValueType(); |
18490 | EVT VT = ReduceOp.getValueType(); |
18491 | |
18492 | if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1) |
18493 | return SDValue(); |
18494 | |
18495 | SDValue Pg = getPredicateForVector(DAG, DL, OpVT); |
18496 | |
18497 | switch (ReduceOp.getOpcode()) { |
18498 | default: |
18499 | return SDValue(); |
18500 | case ISD::VECREDUCE_OR: |
18501 | return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE); |
18502 | case ISD::VECREDUCE_AND: { |
18503 | Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg); |
18504 | return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE); |
18505 | } |
18506 | case ISD::VECREDUCE_XOR: { |
18507 | SDValue ID = |
18508 | DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64); |
18509 | SDValue Cntp = |
18510 | DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op); |
18511 | return DAG.getAnyExtOrTrunc(Cntp, DL, VT); |
18512 | } |
18513 | } |
18514 | |
18515 | return SDValue(); |
18516 | } |
18517 | |
18518 | SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode, |
18519 | SDValue ScalarOp, |
18520 | SelectionDAG &DAG) const { |
18521 | SDLoc DL(ScalarOp); |
18522 | SDValue VecOp = ScalarOp.getOperand(0); |
18523 | EVT SrcVT = VecOp.getValueType(); |
18524 | |
18525 | if (useSVEForFixedLengthVectorVT(SrcVT, true)) { |
18526 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT); |
18527 | VecOp = convertToScalableVector(DAG, ContainerVT, VecOp); |
18528 | } |
18529 | |
18530 | |
18531 | EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 : |
18532 | SrcVT.getVectorElementType(); |
18533 | EVT RdxVT = SrcVT; |
18534 | if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED) |
18535 | RdxVT = getPackedSVEVectorVT(ResVT); |
18536 | |
18537 | SDValue Pg = getPredicateForVector(DAG, DL, SrcVT); |
18538 | SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp); |
18539 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, |
18540 | Rdx, DAG.getConstant(0, DL, MVT::i64)); |
18541 | |
18542 | |
18543 | if (ResVT != ScalarOp.getValueType()) |
18544 | Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType()); |
18545 | |
18546 | return Res; |
18547 | } |
18548 | |
18549 | SDValue |
18550 | AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op, |
18551 | SelectionDAG &DAG) const { |
18552 | EVT VT = Op.getValueType(); |
18553 | SDLoc DL(Op); |
18554 | |
18555 | EVT InVT = Op.getOperand(1).getValueType(); |
18556 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); |
18557 | SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1)); |
18558 | SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2)); |
18559 | |
18560 | |
18561 | |
18562 | EVT MaskVT = Op.getOperand(0).getValueType(); |
18563 | EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT); |
18564 | auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0)); |
18565 | Mask = DAG.getNode(ISD::TRUNCATE, DL, |
18566 | MaskContainerVT.changeVectorElementType(MVT::i1), Mask); |
18567 | |
18568 | auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT, |
18569 | Mask, Op1, Op2); |
18570 | |
18571 | return convertFromScalableVector(DAG, VT, ScalableRes); |
18572 | } |
18573 | |
18574 | SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE( |
18575 | SDValue Op, SelectionDAG &DAG) const { |
18576 | SDLoc DL(Op); |
18577 | EVT InVT = Op.getOperand(0).getValueType(); |
18578 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); |
18579 | |
18580 | assert(useSVEForFixedLengthVectorVT(InVT) && |
18581 | "Only expected to lower fixed length vector operation!"); |
18582 | assert(Op.getValueType() == InVT.changeTypeToInteger() && |
18583 | "Expected integer result of the same bit length as the inputs!"); |
18584 | |
18585 | auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); |
18586 | auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1)); |
18587 | auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT); |
18588 | |
18589 | EVT CmpVT = Pg.getValueType(); |
18590 | auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT, |
18591 | {Pg, Op1, Op2, Op.getOperand(2)}); |
18592 | |
18593 | EVT PromoteVT = ContainerVT.changeTypeToInteger(); |
18594 | auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT); |
18595 | return convertFromScalableVector(DAG, Op.getValueType(), Promote); |
18596 | } |
18597 | |
18598 | SDValue |
18599 | AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op, |
18600 | SelectionDAG &DAG) const { |
18601 | SDLoc DL(Op); |
18602 | auto SrcOp = Op.getOperand(0); |
18603 | EVT VT = Op.getValueType(); |
18604 | EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT); |
18605 | EVT ContainerSrcVT = |
18606 | getContainerForFixedLengthVector(DAG, SrcOp.getValueType()); |
18607 | |
18608 | SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp); |
18609 | Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp); |
18610 | return convertFromScalableVector(DAG, VT, Op); |
18611 | } |
18612 | |
18613 | SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE( |
18614 | SDValue Op, SelectionDAG &DAG) const { |
18615 | SDLoc DL(Op); |
18616 | unsigned NumOperands = Op->getNumOperands(); |
18617 | |
18618 | assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && |
18619 | "Unexpected number of operands in CONCAT_VECTORS"); |
18620 | |
18621 | auto SrcOp1 = Op.getOperand(0); |
18622 | auto SrcOp2 = Op.getOperand(1); |
18623 | EVT VT = Op.getValueType(); |
18624 | EVT SrcVT = SrcOp1.getValueType(); |
18625 | |
18626 | if (NumOperands > 2) { |
18627 | SmallVector<SDValue, 4> Ops; |
18628 | EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext()); |
18629 | for (unsigned I = 0; I < NumOperands; I += 2) |
18630 | Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT, |
18631 | Op->getOperand(I), Op->getOperand(I + 1))); |
18632 | |
18633 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops); |
18634 | } |
18635 | |
18636 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18637 | |
18638 | SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT); |
18639 | SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1); |
18640 | SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2); |
18641 | |
18642 | Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2); |
18643 | |
18644 | return convertFromScalableVector(DAG, VT, Op); |
18645 | } |
18646 | |
18647 | SDValue |
18648 | AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op, |
18649 | SelectionDAG &DAG) const { |
18650 | EVT VT = Op.getValueType(); |
18651 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18652 | |
18653 | SDLoc DL(Op); |
18654 | SDValue Val = Op.getOperand(0); |
18655 | SDValue Pg = getPredicateForVector(DAG, DL, VT); |
18656 | EVT SrcVT = Val.getValueType(); |
18657 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18658 | EVT ExtendVT = ContainerVT.changeVectorElementType( |
18659 | SrcVT.getVectorElementType()); |
18660 | |
18661 | Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val); |
18662 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val); |
18663 | |
18664 | Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val); |
18665 | Val = getSVESafeBitCast(ExtendVT, Val, DAG); |
18666 | Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, |
18667 | Pg, Val, DAG.getUNDEF(ContainerVT)); |
18668 | |
18669 | return convertFromScalableVector(DAG, VT, Val); |
18670 | } |
18671 | |
18672 | SDValue |
18673 | AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op, |
18674 | SelectionDAG &DAG) const { |
18675 | EVT VT = Op.getValueType(); |
18676 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18677 | |
18678 | SDLoc DL(Op); |
18679 | SDValue Val = Op.getOperand(0); |
18680 | EVT SrcVT = Val.getValueType(); |
18681 | EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT); |
18682 | EVT RoundVT = ContainerSrcVT.changeVectorElementType( |
18683 | VT.getVectorElementType()); |
18684 | SDValue Pg = getPredicateForVector(DAG, DL, RoundVT); |
18685 | |
18686 | Val = convertToScalableVector(DAG, ContainerSrcVT, Val); |
18687 | Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val, |
18688 | Op.getOperand(1), DAG.getUNDEF(RoundVT)); |
18689 | Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG); |
18690 | Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val); |
18691 | |
18692 | Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val); |
18693 | return DAG.getNode(ISD::BITCAST, DL, VT, Val); |
18694 | } |
18695 | |
18696 | SDValue |
18697 | AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op, |
18698 | SelectionDAG &DAG) const { |
18699 | EVT VT = Op.getValueType(); |
18700 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18701 | |
18702 | bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP; |
18703 | unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU |
18704 | : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU; |
18705 | |
18706 | SDLoc DL(Op); |
18707 | SDValue Val = Op.getOperand(0); |
18708 | EVT SrcVT = Val.getValueType(); |
18709 | EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT); |
18710 | EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT); |
18711 | |
18712 | if (ContainerSrcVT.getVectorElementType().getSizeInBits() <= |
18713 | ContainerDstVT.getVectorElementType().getSizeInBits()) { |
18714 | SDValue Pg = getPredicateForVector(DAG, DL, VT); |
18715 | |
18716 | Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, |
18717 | VT.changeTypeToInteger(), Val); |
18718 | |
18719 | Val = convertToScalableVector(DAG, ContainerSrcVT, Val); |
18720 | Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG); |
18721 | |
18722 | |
18723 | Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val, |
18724 | DAG.getUNDEF(ContainerDstVT)); |
18725 | return convertFromScalableVector(DAG, VT, Val); |
18726 | } else { |
18727 | EVT CvtVT = ContainerSrcVT.changeVectorElementType( |
18728 | ContainerDstVT.getVectorElementType()); |
18729 | SDValue Pg = getPredicateForVector(DAG, DL, CvtVT); |
18730 | |
18731 | Val = convertToScalableVector(DAG, ContainerSrcVT, Val); |
18732 | Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT)); |
18733 | Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG); |
18734 | Val = convertFromScalableVector(DAG, SrcVT, Val); |
18735 | |
18736 | Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val); |
18737 | return DAG.getNode(ISD::BITCAST, DL, VT, Val); |
18738 | } |
18739 | } |
18740 | |
18741 | SDValue |
18742 | AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op, |
18743 | SelectionDAG &DAG) const { |
18744 | EVT VT = Op.getValueType(); |
18745 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18746 | |
18747 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; |
18748 | unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU |
18749 | : AArch64ISD::FCVTZU_MERGE_PASSTHRU; |
18750 | |
18751 | SDLoc DL(Op); |
18752 | SDValue Val = Op.getOperand(0); |
18753 | EVT SrcVT = Val.getValueType(); |
18754 | EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT); |
18755 | EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT); |
18756 | |
18757 | if (ContainerSrcVT.getVectorElementType().getSizeInBits() <= |
18758 | ContainerDstVT.getVectorElementType().getSizeInBits()) { |
18759 | EVT CvtVT = ContainerDstVT.changeVectorElementType( |
18760 | ContainerSrcVT.getVectorElementType()); |
18761 | SDValue Pg = getPredicateForVector(DAG, DL, VT); |
18762 | |
18763 | Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val); |
18764 | Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val); |
18765 | |
18766 | Val = convertToScalableVector(DAG, ContainerSrcVT, Val); |
18767 | Val = getSVESafeBitCast(CvtVT, Val, DAG); |
18768 | Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val, |
18769 | DAG.getUNDEF(ContainerDstVT)); |
18770 | return convertFromScalableVector(DAG, VT, Val); |
18771 | } else { |
18772 | EVT CvtVT = ContainerSrcVT.changeTypeToInteger(); |
18773 | SDValue Pg = getPredicateForVector(DAG, DL, CvtVT); |
18774 | |
18775 | |
18776 | |
18777 | Val = convertToScalableVector(DAG, ContainerSrcVT, Val); |
18778 | Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT)); |
18779 | Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val); |
18780 | |
18781 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Val); |
18782 | } |
18783 | } |
18784 | |
18785 | SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( |
18786 | SDValue Op, SelectionDAG &DAG) const { |
18787 | EVT VT = Op.getValueType(); |
18788 | assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); |
18789 | |
18790 | auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); |
18791 | auto ShuffleMask = SVN->getMask(); |
18792 | |
18793 | SDLoc DL(Op); |
18794 | SDValue Op1 = Op.getOperand(0); |
18795 | SDValue Op2 = Op.getOperand(1); |
18796 | |
18797 | EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); |
18798 | Op1 = convertToScalableVector(DAG, ContainerVT, Op1); |
18799 | Op2 = convertToScalableVector(DAG, ContainerVT, Op2); |
18800 | |
18801 | bool ReverseEXT = false; |
18802 | unsigned Imm; |
18803 | if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) && |
18804 | Imm == VT.getVectorNumElements() - 1) { |
18805 | if (ReverseEXT) |
18806 | std::swap(Op1, Op2); |
18807 | |
18808 | EVT ScalarTy = VT.getVectorElementType(); |
18809 | if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) |
18810 | ScalarTy = MVT::i32; |
18811 | SDValue Scalar = DAG.getNode( |
18812 | ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1, |
18813 | DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64)); |
18814 | Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar); |
18815 | return convertFromScalableVector(DAG, VT, Op); |
18816 | } |
18817 | |
18818 | return SDValue(); |
18819 | } |
18820 | |
18821 | SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op, |
18822 | SelectionDAG &DAG) const { |
18823 | SDLoc DL(Op); |
18824 | EVT InVT = Op.getValueType(); |
18825 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
18826 | (void)TLI; |
18827 | |
18828 | assert(VT.isScalableVector() && TLI.isTypeLegal(VT) && |
18829 | InVT.isScalableVector() && TLI.isTypeLegal(InVT) && |
18830 | "Only expect to cast between legal scalable vector types!"); |
18831 | assert((VT.getVectorElementType() == MVT::i1) == |
18832 | (InVT.getVectorElementType() == MVT::i1) && |
18833 | "Cannot cast between data and predicate scalable vector types!"); |
18834 | |
18835 | if (InVT == VT) |
18836 | return Op; |
18837 | |
18838 | if (VT.getVectorElementType() == MVT::i1) |
18839 | return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); |
18840 | |
18841 | EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType()); |
18842 | EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType()); |
18843 | |
18844 | |
18845 | if (InVT != PackedInVT) |
18846 | Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op); |
18847 | |
18848 | Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op); |
18849 | |
18850 | |
18851 | if (VT != PackedVT) |
18852 | Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op); |
18853 | |
18854 | return Op; |
18855 | } |
18856 | |
18857 | bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const { |
18858 | return ::isAllActivePredicate(N); |
18859 | } |
18860 | |
18861 | EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const { |
18862 | return ::getPromotedVTForPredicate(VT); |
18863 | } |
18864 | |
18865 | bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( |
18866 | SDValue Op, const APInt &OriginalDemandedBits, |
18867 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
18868 | unsigned Depth) const { |
18869 | |
18870 | unsigned Opc = Op.getOpcode(); |
18871 | switch (Opc) { |
18872 | case AArch64ISD::VSHL: { |
18873 | |
18874 | SDValue ShiftL = Op; |
18875 | SDValue ShiftR = Op->getOperand(0); |
18876 | if (ShiftR->getOpcode() != AArch64ISD::VLSHR) |
18877 | return false; |
18878 | |
18879 | if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse()) |
18880 | return false; |
18881 | |
18882 | unsigned ShiftLBits = ShiftL->getConstantOperandVal(1); |
18883 | unsigned ShiftRBits = ShiftR->getConstantOperandVal(1); |
18884 | |
18885 | |
18886 | |
18887 | if (ShiftRBits != ShiftLBits) |
18888 | return false; |
18889 | |
18890 | unsigned ScalarSize = Op.getScalarValueSizeInBits(); |
18891 | assert(ScalarSize > ShiftLBits && "Invalid shift imm"); |
18892 | |
18893 | APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits); |
18894 | APInt UnusedBits = ~OriginalDemandedBits; |
18895 | |
18896 | if ((ZeroBits & UnusedBits) != ZeroBits) |
18897 | return false; |
18898 | |
18899 | |
18900 | |
18901 | return TLO.CombineTo(Op, ShiftR->getOperand(0)); |
18902 | } |
18903 | } |
18904 | |
18905 | return TargetLowering::SimplifyDemandedBitsForTargetNode( |
18906 | Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); |
18907 | } |
18908 | |
18909 | bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal( |
18910 | unsigned Opc, LLT Ty1, LLT Ty2) const { |
18911 | return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)); |
18912 | } |