Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 11455, column 25
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")__builtin_unreachable();
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")__builtin_unreachable();
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast<void> (0))
172 "Expected scalable predicate vector type!")(static_cast<void> (0));
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")__builtin_unreachable();
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast<void> (0))
194 "Expected legal vector type!")(static_cast<void> (0));
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasLS64()) {
250 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
251 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
252 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
253 }
254
255 if (Subtarget->hasFPARMv8()) {
256 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261 }
262
263 if (Subtarget->hasNEON()) {
264 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266 // Someone set us up the NEON.
267 addDRTypeForNEON(MVT::v2f32);
268 addDRTypeForNEON(MVT::v8i8);
269 addDRTypeForNEON(MVT::v4i16);
270 addDRTypeForNEON(MVT::v2i32);
271 addDRTypeForNEON(MVT::v1i64);
272 addDRTypeForNEON(MVT::v1f64);
273 addDRTypeForNEON(MVT::v4f16);
274 if (Subtarget->hasBF16())
275 addDRTypeForNEON(MVT::v4bf16);
276
277 addQRTypeForNEON(MVT::v4f32);
278 addQRTypeForNEON(MVT::v2f64);
279 addQRTypeForNEON(MVT::v16i8);
280 addQRTypeForNEON(MVT::v8i16);
281 addQRTypeForNEON(MVT::v4i32);
282 addQRTypeForNEON(MVT::v2i64);
283 addQRTypeForNEON(MVT::v8f16);
284 if (Subtarget->hasBF16())
285 addQRTypeForNEON(MVT::v8bf16);
286 }
287
288 if (Subtarget->hasSVE()) {
289 // Add legal sve predicate types
290 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294
295 // Add legal sve data types
296 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300
301 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307
308 if (Subtarget->hasBF16()) {
309 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312 }
313
314 if (Subtarget->useSVEForFixedLengthVectors()) {
315 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
316 if (useSVEForFixedLengthVectorVT(VT))
317 addRegisterClass(VT, &AArch64::ZPRRegClass);
318
319 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
320 if (useSVEForFixedLengthVectorVT(VT))
321 addRegisterClass(VT, &AArch64::ZPRRegClass);
322 }
323
324 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325 setOperationAction(ISD::SADDSAT, VT, Legal);
326 setOperationAction(ISD::UADDSAT, VT, Legal);
327 setOperationAction(ISD::SSUBSAT, VT, Legal);
328 setOperationAction(ISD::USUBSAT, VT, Legal);
329 setOperationAction(ISD::UREM, VT, Expand);
330 setOperationAction(ISD::SREM, VT, Expand);
331 setOperationAction(ISD::SDIVREM, VT, Expand);
332 setOperationAction(ISD::UDIVREM, VT, Expand);
333 }
334
335 for (auto VT :
336 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
337 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
338 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
339
340 for (auto VT :
341 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
342 MVT::nxv2f64 }) {
343 setCondCodeAction(ISD::SETO, VT, Expand);
344 setCondCodeAction(ISD::SETOLT, VT, Expand);
345 setCondCodeAction(ISD::SETLT, VT, Expand);
346 setCondCodeAction(ISD::SETOLE, VT, Expand);
347 setCondCodeAction(ISD::SETLE, VT, Expand);
348 setCondCodeAction(ISD::SETULT, VT, Expand);
349 setCondCodeAction(ISD::SETULE, VT, Expand);
350 setCondCodeAction(ISD::SETUGE, VT, Expand);
351 setCondCodeAction(ISD::SETUGT, VT, Expand);
352 setCondCodeAction(ISD::SETUEQ, VT, Expand);
353 setCondCodeAction(ISD::SETUNE, VT, Expand);
354
355 setOperationAction(ISD::FREM, VT, Expand);
356 setOperationAction(ISD::FPOW, VT, Expand);
357 setOperationAction(ISD::FPOWI, VT, Expand);
358 setOperationAction(ISD::FCOS, VT, Expand);
359 setOperationAction(ISD::FSIN, VT, Expand);
360 setOperationAction(ISD::FSINCOS, VT, Expand);
361 setOperationAction(ISD::FEXP, VT, Expand);
362 setOperationAction(ISD::FEXP2, VT, Expand);
363 setOperationAction(ISD::FLOG, VT, Expand);
364 setOperationAction(ISD::FLOG2, VT, Expand);
365 setOperationAction(ISD::FLOG10, VT, Expand);
366 }
367 }
368
369 // Compute derived properties from the register classes
370 computeRegisterProperties(Subtarget->getRegisterInfo());
371
372 // Provide all sorts of operation actions
373 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
374 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
375 setOperationAction(ISD::SETCC, MVT::i32, Custom);
376 setOperationAction(ISD::SETCC, MVT::i64, Custom);
377 setOperationAction(ISD::SETCC, MVT::f16, Custom);
378 setOperationAction(ISD::SETCC, MVT::f32, Custom);
379 setOperationAction(ISD::SETCC, MVT::f64, Custom);
380 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
381 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
382 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
383 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
384 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
385 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
386 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
387 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
388 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
389 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
390 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
391 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
392 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
393 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
394 setOperationAction(ISD::SELECT, MVT::i32, Custom);
395 setOperationAction(ISD::SELECT, MVT::i64, Custom);
396 setOperationAction(ISD::SELECT, MVT::f16, Custom);
397 setOperationAction(ISD::SELECT, MVT::f32, Custom);
398 setOperationAction(ISD::SELECT, MVT::f64, Custom);
399 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
400 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
401 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
404 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
405 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
406
407 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
408 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
409 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
410
411 setOperationAction(ISD::FREM, MVT::f32, Expand);
412 setOperationAction(ISD::FREM, MVT::f64, Expand);
413 setOperationAction(ISD::FREM, MVT::f80, Expand);
414
415 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
416
417 // Custom lowering hooks are needed for XOR
418 // to fold it into CSINC/CSINV.
419 setOperationAction(ISD::XOR, MVT::i32, Custom);
420 setOperationAction(ISD::XOR, MVT::i64, Custom);
421
422 // Virtually no operation on f128 is legal, but LLVM can't expand them when
423 // there's a valid register class, so we need custom operations in most cases.
424 setOperationAction(ISD::FABS, MVT::f128, Expand);
425 setOperationAction(ISD::FADD, MVT::f128, LibCall);
426 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
427 setOperationAction(ISD::FCOS, MVT::f128, Expand);
428 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
429 setOperationAction(ISD::FMA, MVT::f128, Expand);
430 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
431 setOperationAction(ISD::FNEG, MVT::f128, Expand);
432 setOperationAction(ISD::FPOW, MVT::f128, Expand);
433 setOperationAction(ISD::FREM, MVT::f128, Expand);
434 setOperationAction(ISD::FRINT, MVT::f128, Expand);
435 setOperationAction(ISD::FSIN, MVT::f128, Expand);
436 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
437 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
438 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
439 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
440 setOperationAction(ISD::SETCC, MVT::f128, Custom);
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
443 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
444 setOperationAction(ISD::SELECT, MVT::f128, Custom);
445 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
446 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
447
448 // Lowering for many of the conversions is actually specified by the non-f128
449 // type. The LowerXXX function will be trivial when f128 isn't involved.
450 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
456 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
457 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
458 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
470 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
473 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
474 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
475 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
476 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
477 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
478 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
479 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
480
481 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
482 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
483 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
484 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
485
486 // Variable arguments.
487 setOperationAction(ISD::VASTART, MVT::Other, Custom);
488 setOperationAction(ISD::VAARG, MVT::Other, Custom);
489 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
490 setOperationAction(ISD::VAEND, MVT::Other, Expand);
491
492 // Variable-sized objects.
493 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
494 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
495
496 if (Subtarget->isTargetWindows())
497 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
498 else
499 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
500
501 // Constant pool entries
502 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
503
504 // BlockAddress
505 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
506
507 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
508 setOperationAction(ISD::ADDC, MVT::i32, Custom);
509 setOperationAction(ISD::ADDE, MVT::i32, Custom);
510 setOperationAction(ISD::SUBC, MVT::i32, Custom);
511 setOperationAction(ISD::SUBE, MVT::i32, Custom);
512 setOperationAction(ISD::ADDC, MVT::i64, Custom);
513 setOperationAction(ISD::ADDE, MVT::i64, Custom);
514 setOperationAction(ISD::SUBC, MVT::i64, Custom);
515 setOperationAction(ISD::SUBE, MVT::i64, Custom);
516
517 // AArch64 lacks both left-rotate and popcount instructions.
518 setOperationAction(ISD::ROTL, MVT::i32, Expand);
519 setOperationAction(ISD::ROTL, MVT::i64, Expand);
520 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
521 setOperationAction(ISD::ROTL, VT, Expand);
522 setOperationAction(ISD::ROTR, VT, Expand);
523 }
524
525 // AArch64 doesn't have i32 MULH{S|U}.
526 setOperationAction(ISD::MULHU, MVT::i32, Expand);
527 setOperationAction(ISD::MULHS, MVT::i32, Expand);
528
529 // AArch64 doesn't have {U|S}MUL_LOHI.
530 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
531 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
532
533 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
534 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
535 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
536
537 setOperationAction(ISD::ABS, MVT::i32, Custom);
538 setOperationAction(ISD::ABS, MVT::i64, Custom);
539
540 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
541 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
542 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
543 setOperationAction(ISD::SDIVREM, VT, Expand);
544 setOperationAction(ISD::UDIVREM, VT, Expand);
545 }
546 setOperationAction(ISD::SREM, MVT::i32, Expand);
547 setOperationAction(ISD::SREM, MVT::i64, Expand);
548 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
549 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
550 setOperationAction(ISD::UREM, MVT::i32, Expand);
551 setOperationAction(ISD::UREM, MVT::i64, Expand);
552
553 // Custom lower Add/Sub/Mul with overflow.
554 setOperationAction(ISD::SADDO, MVT::i32, Custom);
555 setOperationAction(ISD::SADDO, MVT::i64, Custom);
556 setOperationAction(ISD::UADDO, MVT::i32, Custom);
557 setOperationAction(ISD::UADDO, MVT::i64, Custom);
558 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
559 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
560 setOperationAction(ISD::USUBO, MVT::i32, Custom);
561 setOperationAction(ISD::USUBO, MVT::i64, Custom);
562 setOperationAction(ISD::SMULO, MVT::i32, Custom);
563 setOperationAction(ISD::SMULO, MVT::i64, Custom);
564 setOperationAction(ISD::UMULO, MVT::i32, Custom);
565 setOperationAction(ISD::UMULO, MVT::i64, Custom);
566
567 setOperationAction(ISD::FSIN, MVT::f32, Expand);
568 setOperationAction(ISD::FSIN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOS, MVT::f32, Expand);
570 setOperationAction(ISD::FCOS, MVT::f64, Expand);
571 setOperationAction(ISD::FPOW, MVT::f32, Expand);
572 setOperationAction(ISD::FPOW, MVT::f64, Expand);
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
575 if (Subtarget->hasFullFP16())
576 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
577 else
578 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
579
580 setOperationAction(ISD::FREM, MVT::f16, Promote);
581 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
582 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
583 setOperationAction(ISD::FPOW, MVT::f16, Promote);
584 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
585 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
586 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
587 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
589 setOperationAction(ISD::FCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FSIN, MVT::f16, Promote);
593 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
594 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
595 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
596 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
597 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
598 setOperationAction(ISD::FEXP, MVT::f16, Promote);
599 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
600 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
601 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
602 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
607 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
608 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
610 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
611 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
612 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
613
614 if (!Subtarget->hasFullFP16()) {
615 setOperationAction(ISD::SELECT, MVT::f16, Promote);
616 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
617 setOperationAction(ISD::SETCC, MVT::f16, Promote);
618 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
619 setOperationAction(ISD::FADD, MVT::f16, Promote);
620 setOperationAction(ISD::FSUB, MVT::f16, Promote);
621 setOperationAction(ISD::FMUL, MVT::f16, Promote);
622 setOperationAction(ISD::FDIV, MVT::f16, Promote);
623 setOperationAction(ISD::FMA, MVT::f16, Promote);
624 setOperationAction(ISD::FNEG, MVT::f16, Promote);
625 setOperationAction(ISD::FABS, MVT::f16, Promote);
626 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
627 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
628 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
629 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
630 setOperationAction(ISD::FRINT, MVT::f16, Promote);
631 setOperationAction(ISD::FROUND, MVT::f16, Promote);
632 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
633 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
634 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
635 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
636 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
637 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
638
639 // promote v4f16 to v4f32 when that is known to be safe.
640 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
641 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
642 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
643 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
644 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
645 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
646 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
647 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
648
649 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
650 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
651 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
652 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
653 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
654 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
655 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
656 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
657 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
658 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
659 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
661 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
662 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
664 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
665
666 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
667 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
668 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
669 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
670 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
671 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
672 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
673 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
674 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
675 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
676 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
677 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
678 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
679 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
680 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
681 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
682 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
683 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
684 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
685 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
686 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
687 }
688
689 // AArch64 has implementations of a lot of rounding-like FP operations.
690 for (MVT Ty : {MVT::f32, MVT::f64}) {
691 setOperationAction(ISD::FFLOOR, Ty, Legal);
692 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
693 setOperationAction(ISD::FCEIL, Ty, Legal);
694 setOperationAction(ISD::FRINT, Ty, Legal);
695 setOperationAction(ISD::FTRUNC, Ty, Legal);
696 setOperationAction(ISD::FROUND, Ty, Legal);
697 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
698 setOperationAction(ISD::FMINNUM, Ty, Legal);
699 setOperationAction(ISD::FMAXNUM, Ty, Legal);
700 setOperationAction(ISD::FMINIMUM, Ty, Legal);
701 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
702 setOperationAction(ISD::LROUND, Ty, Legal);
703 setOperationAction(ISD::LLROUND, Ty, Legal);
704 setOperationAction(ISD::LRINT, Ty, Legal);
705 setOperationAction(ISD::LLRINT, Ty, Legal);
706 }
707
708 if (Subtarget->hasFullFP16()) {
709 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
710 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
711 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
712 setOperationAction(ISD::FRINT, MVT::f16, Legal);
713 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
714 setOperationAction(ISD::FROUND, MVT::f16, Legal);
715 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
716 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
717 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
718 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
719 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
720 }
721
722 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
723
724 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
725 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
726
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
728 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
729 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
730 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
731 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
732
733 // Generate outline atomics library calls only if LSE was not specified for
734 // subtarget
735 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
736 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
755 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
756 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
757 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
759 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
760 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
761#define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766#define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769#define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779#undef LCALLNAMES
780#undef LCALLNAME4
781#undef LCALLNAME5
782 }
783
784 // 128-bit loads and stores can be done without expanding
785 setOperationAction(ISD::LOAD, MVT::i128, Custom);
786 setOperationAction(ISD::STORE, MVT::i128, Custom);
787
788 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
789 // custom lowering, as there are no un-paired non-temporal stores and
790 // legalization will break up 256 bit inputs.
791 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
792 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
793 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
794 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
795 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
796 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
797 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
798
799 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
800 // This requires the Performance Monitors extension.
801 if (Subtarget->hasPerfMon())
802 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
803
804 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
805 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
806 // Issue __sincos_stret if available.
807 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
808 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
809 } else {
810 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
811 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
812 }
813
814 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
815 // MSVCRT doesn't have powi; fall back to pow
816 setLibcallName(RTLIB::POWI_F32, nullptr);
817 setLibcallName(RTLIB::POWI_F64, nullptr);
818 }
819
820 // Make floating-point constants legal for the large code model, so they don't
821 // become loads from the constant pool.
822 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
823 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
824 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
825 }
826
827 // AArch64 does not have floating-point extending loads, i1 sign-extending
828 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
829 for (MVT VT : MVT::fp_valuetypes()) {
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
831 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
832 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
833 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
834 }
835 for (MVT VT : MVT::integer_valuetypes())
836 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
837
838 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
839 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
840 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
841 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
842 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
843 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
844 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
845
846 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
847 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
848 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
849
850 // Indexed loads and stores are supported.
851 for (unsigned im = (unsigned)ISD::PRE_INC;
852 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
853 setIndexedLoadAction(im, MVT::i8, Legal);
854 setIndexedLoadAction(im, MVT::i16, Legal);
855 setIndexedLoadAction(im, MVT::i32, Legal);
856 setIndexedLoadAction(im, MVT::i64, Legal);
857 setIndexedLoadAction(im, MVT::f64, Legal);
858 setIndexedLoadAction(im, MVT::f32, Legal);
859 setIndexedLoadAction(im, MVT::f16, Legal);
860 setIndexedLoadAction(im, MVT::bf16, Legal);
861 setIndexedStoreAction(im, MVT::i8, Legal);
862 setIndexedStoreAction(im, MVT::i16, Legal);
863 setIndexedStoreAction(im, MVT::i32, Legal);
864 setIndexedStoreAction(im, MVT::i64, Legal);
865 setIndexedStoreAction(im, MVT::f64, Legal);
866 setIndexedStoreAction(im, MVT::f32, Legal);
867 setIndexedStoreAction(im, MVT::f16, Legal);
868 setIndexedStoreAction(im, MVT::bf16, Legal);
869 }
870
871 // Trap.
872 setOperationAction(ISD::TRAP, MVT::Other, Legal);
873 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
874 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
875
876 // We combine OR nodes for bitfield operations.
877 setTargetDAGCombine(ISD::OR);
878 // Try to create BICs for vector ANDs.
879 setTargetDAGCombine(ISD::AND);
880
881 // Vector add and sub nodes may conceal a high-half opportunity.
882 // Also, try to fold ADD into CSINC/CSINV..
883 setTargetDAGCombine(ISD::ADD);
884 setTargetDAGCombine(ISD::ABS);
885 setTargetDAGCombine(ISD::SUB);
886 setTargetDAGCombine(ISD::SRL);
887 setTargetDAGCombine(ISD::XOR);
888 setTargetDAGCombine(ISD::SINT_TO_FP);
889 setTargetDAGCombine(ISD::UINT_TO_FP);
890
891 // TODO: Do the same for FP_TO_*INT_SAT.
892 setTargetDAGCombine(ISD::FP_TO_SINT);
893 setTargetDAGCombine(ISD::FP_TO_UINT);
894 setTargetDAGCombine(ISD::FDIV);
895
896 // Try and combine setcc with csel
897 setTargetDAGCombine(ISD::SETCC);
898
899 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
900
901 setTargetDAGCombine(ISD::ANY_EXTEND);
902 setTargetDAGCombine(ISD::ZERO_EXTEND);
903 setTargetDAGCombine(ISD::SIGN_EXTEND);
904 setTargetDAGCombine(ISD::VECTOR_SPLICE);
905 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
906 setTargetDAGCombine(ISD::TRUNCATE);
907 setTargetDAGCombine(ISD::CONCAT_VECTORS);
908 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
909 setTargetDAGCombine(ISD::STORE);
910 if (Subtarget->supportsAddressTopByteIgnored())
911 setTargetDAGCombine(ISD::LOAD);
912
913 setTargetDAGCombine(ISD::MUL);
914
915 setTargetDAGCombine(ISD::SELECT);
916 setTargetDAGCombine(ISD::VSELECT);
917
918 setTargetDAGCombine(ISD::INTRINSIC_VOID);
919 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
920 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
921 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
922 setTargetDAGCombine(ISD::VECREDUCE_ADD);
923 setTargetDAGCombine(ISD::STEP_VECTOR);
924
925 setTargetDAGCombine(ISD::GlobalAddress);
926
927 // In case of strict alignment, avoid an excessive number of byte wide stores.
928 MaxStoresPerMemsetOptSize = 8;
929 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
930 ? MaxStoresPerMemsetOptSize : 32;
931
932 MaxGluedStoresPerMemcpy = 4;
933 MaxStoresPerMemcpyOptSize = 4;
934 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
935 ? MaxStoresPerMemcpyOptSize : 16;
936
937 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
938
939 MaxLoadsPerMemcmpOptSize = 4;
940 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
941 ? MaxLoadsPerMemcmpOptSize : 8;
942
943 setStackPointerRegisterToSaveRestore(AArch64::SP);
944
945 setSchedulingPreference(Sched::Hybrid);
946
947 EnableExtLdPromotion = true;
948
949 // Set required alignment.
950 setMinFunctionAlignment(Align(4));
951 // Set preferred alignments.
952 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
953 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
954
955 // Only change the limit for entries in a jump table if specified by
956 // the sub target, but not at the command line.
957 unsigned MaxJT = STI.getMaximumJumpTableSize();
958 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
959 setMaximumJumpTableSize(MaxJT);
960
961 setHasExtractBitsInsn(true);
962
963 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
964
965 if (Subtarget->hasNEON()) {
966 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
967 // silliness like this:
968 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
969 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
970 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
971 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
972 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
973 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
974 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
975 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
976 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
977 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
978 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
979 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
980 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
981 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
982 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
983 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
984 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
985 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
986 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
987 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
988 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
989 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
990 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
991 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
992 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
993 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
994
995 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
996 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
997 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
998 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
999 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
1000
1001 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
1002 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
1003
1004 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
1005
1006 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1007 // elements smaller than i32, so promote the input to i32 first.
1008 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
1009 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1010 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1011 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1012 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1013 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1014
1015 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1016 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1019 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1020 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1021 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1022 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1023 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1024
1025 if (Subtarget->hasFullFP16()) {
1026 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1027 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1028 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1029 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1030 } else {
1031 // when AArch64 doesn't have fullfp16 support, promote the input
1032 // to i32 first.
1033 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1034 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1035 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1036 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1037 }
1038
1039 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1040 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1041 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1042 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1043 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1044 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1045 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1046 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1047 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1048 setOperationAction(ISD::UMAX, VT, Custom);
1049 setOperationAction(ISD::SMAX, VT, Custom);
1050 setOperationAction(ISD::UMIN, VT, Custom);
1051 setOperationAction(ISD::SMIN, VT, Custom);
1052 }
1053
1054 // AArch64 doesn't have MUL.2d:
1055 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1056 // Custom handling for some quad-vector types to detect MULL.
1057 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1060
1061 // Saturates
1062 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1063 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1064 setOperationAction(ISD::SADDSAT, VT, Legal);
1065 setOperationAction(ISD::UADDSAT, VT, Legal);
1066 setOperationAction(ISD::SSUBSAT, VT, Legal);
1067 setOperationAction(ISD::USUBSAT, VT, Legal);
1068 }
1069
1070 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1071 MVT::v4i32}) {
1072 setOperationAction(ISD::ABDS, VT, Legal);
1073 setOperationAction(ISD::ABDU, VT, Legal);
1074 }
1075
1076 // Vector reductions
1077 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1078 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1079 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1080 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1081 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1082
1083 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1084 }
1085 }
1086 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1087 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1088 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1089 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1090 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1091 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1092 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1093 }
1094 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1095
1096 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1097 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1098 // Likewise, narrowing and extending vector loads/stores aren't handled
1099 // directly.
1100 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1101 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1102
1103 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1104 setOperationAction(ISD::MULHS, VT, Legal);
1105 setOperationAction(ISD::MULHU, VT, Legal);
1106 } else {
1107 setOperationAction(ISD::MULHS, VT, Expand);
1108 setOperationAction(ISD::MULHU, VT, Expand);
1109 }
1110 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1111 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1112
1113 setOperationAction(ISD::BSWAP, VT, Expand);
1114 setOperationAction(ISD::CTTZ, VT, Expand);
1115
1116 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1117 setTruncStoreAction(VT, InnerVT, Expand);
1118 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1119 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1120 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1121 }
1122 }
1123
1124 // AArch64 has implementations of a lot of rounding-like FP operations.
1125 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, Ty, Legal);
1127 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1128 setOperationAction(ISD::FCEIL, Ty, Legal);
1129 setOperationAction(ISD::FRINT, Ty, Legal);
1130 setOperationAction(ISD::FTRUNC, Ty, Legal);
1131 setOperationAction(ISD::FROUND, Ty, Legal);
1132 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1133 }
1134
1135 if (Subtarget->hasFullFP16()) {
1136 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1137 setOperationAction(ISD::FFLOOR, Ty, Legal);
1138 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1139 setOperationAction(ISD::FCEIL, Ty, Legal);
1140 setOperationAction(ISD::FRINT, Ty, Legal);
1141 setOperationAction(ISD::FTRUNC, Ty, Legal);
1142 setOperationAction(ISD::FROUND, Ty, Legal);
1143 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1144 }
1145 }
1146
1147 if (Subtarget->hasSVE())
1148 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1149
1150 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1151
1152 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1153 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1154 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1155 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1156 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1157 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1158 }
1159
1160 if (Subtarget->hasSVE()) {
1161 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1162 setOperationAction(ISD::BITREVERSE, VT, Custom);
1163 setOperationAction(ISD::BSWAP, VT, Custom);
1164 setOperationAction(ISD::CTLZ, VT, Custom);
1165 setOperationAction(ISD::CTPOP, VT, Custom);
1166 setOperationAction(ISD::CTTZ, VT, Custom);
1167 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1168 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1169 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1170 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1171 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1172 setOperationAction(ISD::MGATHER, VT, Custom);
1173 setOperationAction(ISD::MSCATTER, VT, Custom);
1174 setOperationAction(ISD::MLOAD, VT, Custom);
1175 setOperationAction(ISD::MUL, VT, Custom);
1176 setOperationAction(ISD::MULHS, VT, Custom);
1177 setOperationAction(ISD::MULHU, VT, Custom);
1178 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1179 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1180 setOperationAction(ISD::SELECT, VT, Custom);
1181 setOperationAction(ISD::SETCC, VT, Custom);
1182 setOperationAction(ISD::SDIV, VT, Custom);
1183 setOperationAction(ISD::UDIV, VT, Custom);
1184 setOperationAction(ISD::SMIN, VT, Custom);
1185 setOperationAction(ISD::UMIN, VT, Custom);
1186 setOperationAction(ISD::SMAX, VT, Custom);
1187 setOperationAction(ISD::UMAX, VT, Custom);
1188 setOperationAction(ISD::SHL, VT, Custom);
1189 setOperationAction(ISD::SRL, VT, Custom);
1190 setOperationAction(ISD::SRA, VT, Custom);
1191 setOperationAction(ISD::ABS, VT, Custom);
1192 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1193 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1194 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1195 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1196 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1197 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1198 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1200
1201 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1202 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1203 setOperationAction(ISD::SELECT_CC, VT, Expand);
1204 setOperationAction(ISD::ROTL, VT, Expand);
1205 setOperationAction(ISD::ROTR, VT, Expand);
1206 }
1207
1208 // Illegal unpacked integer vector types.
1209 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1210 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1211 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1212 }
1213
1214 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1215 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1216 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1217 setOperationAction(ISD::BITCAST, VT, Custom);
1218
1219 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1220 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1221 setOperationAction(ISD::SELECT, VT, Custom);
1222 setOperationAction(ISD::SETCC, VT, Custom);
1223 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1224 setOperationAction(ISD::TRUNCATE, VT, Custom);
1225 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1226 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1227 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1228
1229 setOperationAction(ISD::SELECT_CC, VT, Expand);
1230 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1231 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1232
1233 // There are no legal MVT::nxv16f## based types.
1234 if (VT != MVT::nxv16i1) {
1235 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1236 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1237 }
1238 }
1239
1240 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1241 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1242 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1243 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1244 setOperationAction(ISD::MLOAD, VT, Custom);
1245 setOperationAction(ISD::MSTORE, VT, Custom);
1246 setOperationAction(ISD::MGATHER, VT, Custom);
1247 setOperationAction(ISD::MSCATTER, VT, Custom);
1248 }
1249
1250 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1251 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1252 // Avoid marking truncating FP stores as legal to prevent the
1253 // DAGCombiner from creating unsupported truncating stores.
1254 setTruncStoreAction(VT, InnerVT, Expand);
1255 // SVE does not have floating-point extending loads.
1256 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1257 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1258 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1259 }
1260 }
1261
1262 // SVE supports truncating stores of 64 and 128-bit vectors
1263 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1264 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1265 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1266 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1267 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1268
1269 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1270 MVT::nxv4f32, MVT::nxv2f64}) {
1271 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1272 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1273 setOperationAction(ISD::MGATHER, VT, Custom);
1274 setOperationAction(ISD::MSCATTER, VT, Custom);
1275 setOperationAction(ISD::MLOAD, VT, Custom);
1276 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1277 setOperationAction(ISD::SELECT, VT, Custom);
1278 setOperationAction(ISD::FADD, VT, Custom);
1279 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1280 setOperationAction(ISD::FDIV, VT, Custom);
1281 setOperationAction(ISD::FMA, VT, Custom);
1282 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1283 setOperationAction(ISD::FMAXNUM, VT, Custom);
1284 setOperationAction(ISD::FMINIMUM, VT, Custom);
1285 setOperationAction(ISD::FMINNUM, VT, Custom);
1286 setOperationAction(ISD::FMUL, VT, Custom);
1287 setOperationAction(ISD::FNEG, VT, Custom);
1288 setOperationAction(ISD::FSUB, VT, Custom);
1289 setOperationAction(ISD::FCEIL, VT, Custom);
1290 setOperationAction(ISD::FFLOOR, VT, Custom);
1291 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1292 setOperationAction(ISD::FRINT, VT, Custom);
1293 setOperationAction(ISD::FROUND, VT, Custom);
1294 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1295 setOperationAction(ISD::FTRUNC, VT, Custom);
1296 setOperationAction(ISD::FSQRT, VT, Custom);
1297 setOperationAction(ISD::FABS, VT, Custom);
1298 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1299 setOperationAction(ISD::FP_ROUND, VT, Custom);
1300 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1301 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1302 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1303 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1304 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1305
1306 setOperationAction(ISD::SELECT_CC, VT, Expand);
1307 }
1308
1309 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311 setOperationAction(ISD::MGATHER, VT, Custom);
1312 setOperationAction(ISD::MSCATTER, VT, Custom);
1313 setOperationAction(ISD::MLOAD, VT, Custom);
1314 }
1315
1316 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1317
1318 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1319 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1320
1321 // NOTE: Currently this has to happen after computeRegisterProperties rather
1322 // than the preferred option of combining it with the addRegisterClass call.
1323 if (Subtarget->useSVEForFixedLengthVectors()) {
1324 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1325 if (useSVEForFixedLengthVectorVT(VT))
1326 addTypeForFixedLengthSVE(VT);
1327 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1328 if (useSVEForFixedLengthVectorVT(VT))
1329 addTypeForFixedLengthSVE(VT);
1330
1331 // 64bit results can mean a bigger than NEON input.
1332 for (auto VT : {MVT::v8i8, MVT::v4i16})
1333 setOperationAction(ISD::TRUNCATE, VT, Custom);
1334 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1335
1336 // 128bit results imply a bigger than NEON input.
1337 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1338 setOperationAction(ISD::TRUNCATE, VT, Custom);
1339 for (auto VT : {MVT::v8f16, MVT::v4f32})
1340 setOperationAction(ISD::FP_ROUND, VT, Custom);
1341
1342 // These operations are not supported on NEON but SVE can do them.
1343 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1344 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1345 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1346 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1347 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1348 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1349 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1351 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1352 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1353 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1354 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1355 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1356 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1357 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1358 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1359 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1360 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1361 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1362 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1363 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1364 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1365 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1366 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1367 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1368 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1369 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1370 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1371 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1372 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1373 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1374 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1375 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1376 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1377 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1378 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1379 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1380 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1381
1382 // Int operations with no NEON support.
1383 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1384 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1385 setOperationAction(ISD::BITREVERSE, VT, Custom);
1386 setOperationAction(ISD::CTTZ, VT, Custom);
1387 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1388 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1389 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1390 }
1391
1392 // FP operations with no NEON support.
1393 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1394 MVT::v1f64, MVT::v2f64})
1395 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1396
1397 // Use SVE for vectors with more than 2 elements.
1398 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1399 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1400 }
1401
1402 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1403 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1404 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1405 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1406 }
1407
1408 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1409}
1410
1411void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1412 assert(VT.isVector() && "VT should be a vector type")(static_cast<void> (0));
1413
1414 if (VT.isFloatingPoint()) {
1415 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1416 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1417 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1418 }
1419
1420 // Mark vector float intrinsics as expand.
1421 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1422 setOperationAction(ISD::FSIN, VT, Expand);
1423 setOperationAction(ISD::FCOS, VT, Expand);
1424 setOperationAction(ISD::FPOW, VT, Expand);
1425 setOperationAction(ISD::FLOG, VT, Expand);
1426 setOperationAction(ISD::FLOG2, VT, Expand);
1427 setOperationAction(ISD::FLOG10, VT, Expand);
1428 setOperationAction(ISD::FEXP, VT, Expand);
1429 setOperationAction(ISD::FEXP2, VT, Expand);
1430 }
1431
1432 // But we do support custom-lowering for FCOPYSIGN.
1433 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1434 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1435 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1436
1437 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1442 setOperationAction(ISD::SRA, VT, Custom);
1443 setOperationAction(ISD::SRL, VT, Custom);
1444 setOperationAction(ISD::SHL, VT, Custom);
1445 setOperationAction(ISD::OR, VT, Custom);
1446 setOperationAction(ISD::SETCC, VT, Custom);
1447 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1448
1449 setOperationAction(ISD::SELECT, VT, Expand);
1450 setOperationAction(ISD::SELECT_CC, VT, Expand);
1451 setOperationAction(ISD::VSELECT, VT, Expand);
1452 for (MVT InnerVT : MVT::all_valuetypes())
1453 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1454
1455 // CNT supports only B element sizes, then use UADDLP to widen.
1456 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1457 setOperationAction(ISD::CTPOP, VT, Custom);
1458
1459 setOperationAction(ISD::UDIV, VT, Expand);
1460 setOperationAction(ISD::SDIV, VT, Expand);
1461 setOperationAction(ISD::UREM, VT, Expand);
1462 setOperationAction(ISD::SREM, VT, Expand);
1463 setOperationAction(ISD::FREM, VT, Expand);
1464
1465 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1466 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1467 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
1468 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
1469
1470 if (!VT.isFloatingPoint())
1471 setOperationAction(ISD::ABS, VT, Legal);
1472
1473 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1474 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1475 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1476 setOperationAction(Opcode, VT, Legal);
1477
1478 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1479 if (VT.isFloatingPoint() &&
1480 VT.getVectorElementType() != MVT::bf16 &&
1481 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1482 for (unsigned Opcode :
1483 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1484 setOperationAction(Opcode, VT, Legal);
1485
1486 if (Subtarget->isLittleEndian()) {
1487 for (unsigned im = (unsigned)ISD::PRE_INC;
1488 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1489 setIndexedLoadAction(im, VT, Legal);
1490 setIndexedStoreAction(im, VT, Legal);
1491 }
1492 }
1493}
1494
1495void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1496 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast<void> (0));
1497
1498 // By default everything must be expanded.
1499 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1500 setOperationAction(Op, VT, Expand);
1501
1502 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1503 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1504
1505 if (VT.isFloatingPoint()) {
1506 setCondCodeAction(ISD::SETO, VT, Expand);
1507 setCondCodeAction(ISD::SETOLT, VT, Expand);
1508 setCondCodeAction(ISD::SETLT, VT, Expand);
1509 setCondCodeAction(ISD::SETOLE, VT, Expand);
1510 setCondCodeAction(ISD::SETLE, VT, Expand);
1511 setCondCodeAction(ISD::SETULT, VT, Expand);
1512 setCondCodeAction(ISD::SETULE, VT, Expand);
1513 setCondCodeAction(ISD::SETUGE, VT, Expand);
1514 setCondCodeAction(ISD::SETUGT, VT, Expand);
1515 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1516 setCondCodeAction(ISD::SETUNE, VT, Expand);
1517 }
1518
1519 // Mark integer truncating stores as having custom lowering
1520 if (VT.isInteger()) {
1521 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1522 while (InnerVT != VT) {
1523 setTruncStoreAction(VT, InnerVT, Custom);
1524 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1525 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1526 InnerVT = InnerVT.changeVectorElementType(
1527 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1528 }
1529 }
1530
1531 // Lower fixed length vector operations to scalable equivalents.
1532 setOperationAction(ISD::ABS, VT, Custom);
1533 setOperationAction(ISD::ADD, VT, Custom);
1534 setOperationAction(ISD::AND, VT, Custom);
1535 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1536 setOperationAction(ISD::BITCAST, VT, Custom);
1537 setOperationAction(ISD::BITREVERSE, VT, Custom);
1538 setOperationAction(ISD::BSWAP, VT, Custom);
1539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1540 setOperationAction(ISD::CTLZ, VT, Custom);
1541 setOperationAction(ISD::CTPOP, VT, Custom);
1542 setOperationAction(ISD::CTTZ, VT, Custom);
1543 setOperationAction(ISD::FABS, VT, Custom);
1544 setOperationAction(ISD::FADD, VT, Custom);
1545 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1546 setOperationAction(ISD::FCEIL, VT, Custom);
1547 setOperationAction(ISD::FDIV, VT, Custom);
1548 setOperationAction(ISD::FFLOOR, VT, Custom);
1549 setOperationAction(ISD::FMA, VT, Custom);
1550 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1551 setOperationAction(ISD::FMAXNUM, VT, Custom);
1552 setOperationAction(ISD::FMINIMUM, VT, Custom);
1553 setOperationAction(ISD::FMINNUM, VT, Custom);
1554 setOperationAction(ISD::FMUL, VT, Custom);
1555 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1556 setOperationAction(ISD::FNEG, VT, Custom);
1557 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1558 setOperationAction(ISD::FP_ROUND, VT, Custom);
1559 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1560 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1561 setOperationAction(ISD::FRINT, VT, Custom);
1562 setOperationAction(ISD::FROUND, VT, Custom);
1563 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1564 setOperationAction(ISD::FSQRT, VT, Custom);
1565 setOperationAction(ISD::FSUB, VT, Custom);
1566 setOperationAction(ISD::FTRUNC, VT, Custom);
1567 setOperationAction(ISD::LOAD, VT, Custom);
1568 setOperationAction(ISD::MGATHER, VT, Custom);
1569 setOperationAction(ISD::MLOAD, VT, Custom);
1570 setOperationAction(ISD::MSCATTER, VT, Custom);
1571 setOperationAction(ISD::MSTORE, VT, Custom);
1572 setOperationAction(ISD::MUL, VT, Custom);
1573 setOperationAction(ISD::MULHS, VT, Custom);
1574 setOperationAction(ISD::MULHU, VT, Custom);
1575 setOperationAction(ISD::OR, VT, Custom);
1576 setOperationAction(ISD::SDIV, VT, Custom);
1577 setOperationAction(ISD::SELECT, VT, Custom);
1578 setOperationAction(ISD::SETCC, VT, Custom);
1579 setOperationAction(ISD::SHL, VT, Custom);
1580 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1581 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1582 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1583 setOperationAction(ISD::SMAX, VT, Custom);
1584 setOperationAction(ISD::SMIN, VT, Custom);
1585 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1586 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1587 setOperationAction(ISD::SRA, VT, Custom);
1588 setOperationAction(ISD::SRL, VT, Custom);
1589 setOperationAction(ISD::STORE, VT, Custom);
1590 setOperationAction(ISD::SUB, VT, Custom);
1591 setOperationAction(ISD::TRUNCATE, VT, Custom);
1592 setOperationAction(ISD::UDIV, VT, Custom);
1593 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1594 setOperationAction(ISD::UMAX, VT, Custom);
1595 setOperationAction(ISD::UMIN, VT, Custom);
1596 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1597 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1598 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1599 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1600 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1601 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1602 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1603 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1605 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1606 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1607 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1608 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1609 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1610 setOperationAction(ISD::VSELECT, VT, Custom);
1611 setOperationAction(ISD::XOR, VT, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613}
1614
1615void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1616 addRegisterClass(VT, &AArch64::FPR64RegClass);
1617 addTypeForNEON(VT);
1618}
1619
1620void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1621 addRegisterClass(VT, &AArch64::FPR128RegClass);
1622 addTypeForNEON(VT);
1623}
1624
1625EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1626 LLVMContext &C, EVT VT) const {
1627 if (!VT.isVector())
1628 return MVT::i32;
1629 if (VT.isScalableVector())
1630 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1631 return VT.changeVectorElementTypeToInteger();
1632}
1633
1634static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1635 const APInt &Demanded,
1636 TargetLowering::TargetLoweringOpt &TLO,
1637 unsigned NewOpc) {
1638 uint64_t OldImm = Imm, NewImm, Enc;
1639 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1640
1641 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1642 // bimm64.
1643 if (Imm == 0 || Imm == Mask ||
1644 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1645 return false;
1646
1647 unsigned EltSize = Size;
1648 uint64_t DemandedBits = Demanded.getZExtValue();
1649
1650 // Clear bits that are not demanded.
1651 Imm &= DemandedBits;
1652
1653 while (true) {
1654 // The goal here is to set the non-demanded bits in a way that minimizes
1655 // the number of switching between 0 and 1. In order to achieve this goal,
1656 // we set the non-demanded bits to the value of the preceding demanded bits.
1657 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1658 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1659 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1660 // The final result is 0b11000011.
1661 uint64_t NonDemandedBits = ~DemandedBits;
1662 uint64_t InvertedImm = ~Imm & DemandedBits;
1663 uint64_t RotatedImm =
1664 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1665 NonDemandedBits;
1666 uint64_t Sum = RotatedImm + NonDemandedBits;
1667 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1668 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1669 NewImm = (Imm | Ones) & Mask;
1670
1671 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1672 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1673 // we halve the element size and continue the search.
1674 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1675 break;
1676
1677 // We cannot shrink the element size any further if it is 2-bits.
1678 if (EltSize == 2)
1679 return false;
1680
1681 EltSize /= 2;
1682 Mask >>= EltSize;
1683 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1684
1685 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1686 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1687 return false;
1688
1689 // Merge the upper and lower halves of Imm and DemandedBits.
1690 Imm |= Hi;
1691 DemandedBits |= DemandedBitsHi;
1692 }
1693
1694 ++NumOptimizedImms;
1695
1696 // Replicate the element across the register width.
1697 while (EltSize < Size) {
1698 NewImm |= NewImm << EltSize;
1699 EltSize *= 2;
1700 }
1701
1702 (void)OldImm;
1703 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast<void> (0))
1704 "demanded bits should never be altered")(static_cast<void> (0));
1705 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast<void> (0));
1706
1707 // Create the new constant immediate node.
1708 EVT VT = Op.getValueType();
1709 SDLoc DL(Op);
1710 SDValue New;
1711
1712 // If the new constant immediate is all-zeros or all-ones, let the target
1713 // independent DAG combine optimize this node.
1714 if (NewImm == 0 || NewImm == OrigMask) {
1715 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1716 TLO.DAG.getConstant(NewImm, DL, VT));
1717 // Otherwise, create a machine node so that target independent DAG combine
1718 // doesn't undo this optimization.
1719 } else {
1720 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1721 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1722 New = SDValue(
1723 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1724 }
1725
1726 return TLO.CombineTo(Op, New);
1727}
1728
1729bool AArch64TargetLowering::targetShrinkDemandedConstant(
1730 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1731 TargetLoweringOpt &TLO) const {
1732 // Delay this optimization to as late as possible.
1733 if (!TLO.LegalOps)
1734 return false;
1735
1736 if (!EnableOptimizeLogicalImm)
1737 return false;
1738
1739 EVT VT = Op.getValueType();
1740 if (VT.isVector())
1741 return false;
1742
1743 unsigned Size = VT.getSizeInBits();
1744 assert((Size == 32 || Size == 64) &&(static_cast<void> (0))
1745 "i32 or i64 is expected after legalization.")(static_cast<void> (0));
1746
1747 // Exit early if we demand all bits.
1748 if (DemandedBits.countPopulation() == Size)
1749 return false;
1750
1751 unsigned NewOpc;
1752 switch (Op.getOpcode()) {
1753 default:
1754 return false;
1755 case ISD::AND:
1756 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1757 break;
1758 case ISD::OR:
1759 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1760 break;
1761 case ISD::XOR:
1762 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1763 break;
1764 }
1765 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1766 if (!C)
1767 return false;
1768 uint64_t Imm = C->getZExtValue();
1769 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1770}
1771
1772/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1773/// Mask are known to be either zero or one and return them Known.
1774void AArch64TargetLowering::computeKnownBitsForTargetNode(
1775 const SDValue Op, KnownBits &Known,
1776 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1777 switch (Op.getOpcode()) {
1778 default:
1779 break;
1780 case AArch64ISD::CSEL: {
1781 KnownBits Known2;
1782 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1783 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1784 Known = KnownBits::commonBits(Known, Known2);
1785 break;
1786 }
1787 case AArch64ISD::LOADgot:
1788 case AArch64ISD::ADDlow: {
1789 if (!Subtarget->isTargetILP32())
1790 break;
1791 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1792 Known.Zero = APInt::getHighBitsSet(64, 32);
1793 break;
1794 }
1795 case ISD::INTRINSIC_W_CHAIN: {
1796 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1797 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1798 switch (IntID) {
1799 default: return;
1800 case Intrinsic::aarch64_ldaxr:
1801 case Intrinsic::aarch64_ldxr: {
1802 unsigned BitWidth = Known.getBitWidth();
1803 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1804 unsigned MemBits = VT.getScalarSizeInBits();
1805 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1806 return;
1807 }
1808 }
1809 break;
1810 }
1811 case ISD::INTRINSIC_WO_CHAIN:
1812 case ISD::INTRINSIC_VOID: {
1813 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1814 switch (IntNo) {
1815 default:
1816 break;
1817 case Intrinsic::aarch64_neon_umaxv:
1818 case Intrinsic::aarch64_neon_uminv: {
1819 // Figure out the datatype of the vector operand. The UMINV instruction
1820 // will zero extend the result, so we can mark as known zero all the
1821 // bits larger than the element datatype. 32-bit or larget doesn't need
1822 // this as those are legal types and will be handled by isel directly.
1823 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1824 unsigned BitWidth = Known.getBitWidth();
1825 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1826 assert(BitWidth >= 8 && "Unexpected width!")(static_cast<void> (0));
1827 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1828 Known.Zero |= Mask;
1829 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1830 assert(BitWidth >= 16 && "Unexpected width!")(static_cast<void> (0));
1831 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1832 Known.Zero |= Mask;
1833 }
1834 break;
1835 } break;
1836 }
1837 }
1838 }
1839}
1840
1841MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1842 EVT) const {
1843 return MVT::i64;
1844}
1845
1846bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1847 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1848 bool *Fast) const {
1849 if (Subtarget->requiresStrictAlign())
1850 return false;
1851
1852 if (Fast) {
1853 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1854 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1855 // See comments in performSTORECombine() for more details about
1856 // these conditions.
1857
1858 // Code that uses clang vector extensions can mark that it
1859 // wants unaligned accesses to be treated as fast by
1860 // underspecifying alignment to be 1 or 2.
1861 Alignment <= 2 ||
1862
1863 // Disregard v2i64. Memcpy lowering produces those and splitting
1864 // them regresses performance on micro-benchmarks and olden/bh.
1865 VT == MVT::v2i64;
1866 }
1867 return true;
1868}
1869
1870// Same as above but handling LLTs instead.
1871bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1872 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1873 bool *Fast) const {
1874 if (Subtarget->requiresStrictAlign())
1875 return false;
1876
1877 if (Fast) {
1878 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1879 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1880 Ty.getSizeInBytes() != 16 ||
1881 // See comments in performSTORECombine() for more details about
1882 // these conditions.
1883
1884 // Code that uses clang vector extensions can mark that it
1885 // wants unaligned accesses to be treated as fast by
1886 // underspecifying alignment to be 1 or 2.
1887 Alignment <= 2 ||
1888
1889 // Disregard v2i64. Memcpy lowering produces those and splitting
1890 // them regresses performance on micro-benchmarks and olden/bh.
1891 Ty == LLT::fixed_vector(2, 64);
1892 }
1893 return true;
1894}
1895
1896FastISel *
1897AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1898 const TargetLibraryInfo *libInfo) const {
1899 return AArch64::createFastISel(funcInfo, libInfo);
1900}
1901
1902const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1903#define MAKE_CASE(V) \
1904 case V: \
1905 return #V;
1906 switch ((AArch64ISD::NodeType)Opcode) {
1907 case AArch64ISD::FIRST_NUMBER:
1908 break;
1909 MAKE_CASE(AArch64ISD::CALL)
1910 MAKE_CASE(AArch64ISD::ADRP)
1911 MAKE_CASE(AArch64ISD::ADR)
1912 MAKE_CASE(AArch64ISD::ADDlow)
1913 MAKE_CASE(AArch64ISD::LOADgot)
1914 MAKE_CASE(AArch64ISD::RET_FLAG)
1915 MAKE_CASE(AArch64ISD::BRCOND)
1916 MAKE_CASE(AArch64ISD::CSEL)
1917 MAKE_CASE(AArch64ISD::CSINV)
1918 MAKE_CASE(AArch64ISD::CSNEG)
1919 MAKE_CASE(AArch64ISD::CSINC)
1920 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1921 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1922 MAKE_CASE(AArch64ISD::ADD_PRED)
1923 MAKE_CASE(AArch64ISD::MUL_PRED)
1924 MAKE_CASE(AArch64ISD::MULHS_PRED)
1925 MAKE_CASE(AArch64ISD::MULHU_PRED)
1926 MAKE_CASE(AArch64ISD::SDIV_PRED)
1927 MAKE_CASE(AArch64ISD::SHL_PRED)
1928 MAKE_CASE(AArch64ISD::SMAX_PRED)
1929 MAKE_CASE(AArch64ISD::SMIN_PRED)
1930 MAKE_CASE(AArch64ISD::SRA_PRED)
1931 MAKE_CASE(AArch64ISD::SRL_PRED)
1932 MAKE_CASE(AArch64ISD::SUB_PRED)
1933 MAKE_CASE(AArch64ISD::UDIV_PRED)
1934 MAKE_CASE(AArch64ISD::UMAX_PRED)
1935 MAKE_CASE(AArch64ISD::UMIN_PRED)
1936 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1937 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1938 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1939 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1940 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1941 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1942 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1943 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1944 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1945 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1946 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1947 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1948 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1949 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1950 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1951 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1952 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1953 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1954 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1955 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1956 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1957 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1958 MAKE_CASE(AArch64ISD::ADC)
1959 MAKE_CASE(AArch64ISD::SBC)
1960 MAKE_CASE(AArch64ISD::ADDS)
1961 MAKE_CASE(AArch64ISD::SUBS)
1962 MAKE_CASE(AArch64ISD::ADCS)
1963 MAKE_CASE(AArch64ISD::SBCS)
1964 MAKE_CASE(AArch64ISD::ANDS)
1965 MAKE_CASE(AArch64ISD::CCMP)
1966 MAKE_CASE(AArch64ISD::CCMN)
1967 MAKE_CASE(AArch64ISD::FCCMP)
1968 MAKE_CASE(AArch64ISD::FCMP)
1969 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1970 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1971 MAKE_CASE(AArch64ISD::DUP)
1972 MAKE_CASE(AArch64ISD::DUPLANE8)
1973 MAKE_CASE(AArch64ISD::DUPLANE16)
1974 MAKE_CASE(AArch64ISD::DUPLANE32)
1975 MAKE_CASE(AArch64ISD::DUPLANE64)
1976 MAKE_CASE(AArch64ISD::MOVI)
1977 MAKE_CASE(AArch64ISD::MOVIshift)
1978 MAKE_CASE(AArch64ISD::MOVIedit)
1979 MAKE_CASE(AArch64ISD::MOVImsl)
1980 MAKE_CASE(AArch64ISD::FMOV)
1981 MAKE_CASE(AArch64ISD::MVNIshift)
1982 MAKE_CASE(AArch64ISD::MVNImsl)
1983 MAKE_CASE(AArch64ISD::BICi)
1984 MAKE_CASE(AArch64ISD::ORRi)
1985 MAKE_CASE(AArch64ISD::BSP)
1986 MAKE_CASE(AArch64ISD::EXTR)
1987 MAKE_CASE(AArch64ISD::ZIP1)
1988 MAKE_CASE(AArch64ISD::ZIP2)
1989 MAKE_CASE(AArch64ISD::UZP1)
1990 MAKE_CASE(AArch64ISD::UZP2)
1991 MAKE_CASE(AArch64ISD::TRN1)
1992 MAKE_CASE(AArch64ISD::TRN2)
1993 MAKE_CASE(AArch64ISD::REV16)
1994 MAKE_CASE(AArch64ISD::REV32)
1995 MAKE_CASE(AArch64ISD::REV64)
1996 MAKE_CASE(AArch64ISD::EXT)
1997 MAKE_CASE(AArch64ISD::SPLICE)
1998 MAKE_CASE(AArch64ISD::VSHL)
1999 MAKE_CASE(AArch64ISD::VLSHR)
2000 MAKE_CASE(AArch64ISD::VASHR)
2001 MAKE_CASE(AArch64ISD::VSLI)
2002 MAKE_CASE(AArch64ISD::VSRI)
2003 MAKE_CASE(AArch64ISD::CMEQ)
2004 MAKE_CASE(AArch64ISD::CMGE)
2005 MAKE_CASE(AArch64ISD::CMGT)
2006 MAKE_CASE(AArch64ISD::CMHI)
2007 MAKE_CASE(AArch64ISD::CMHS)
2008 MAKE_CASE(AArch64ISD::FCMEQ)
2009 MAKE_CASE(AArch64ISD::FCMGE)
2010 MAKE_CASE(AArch64ISD::FCMGT)
2011 MAKE_CASE(AArch64ISD::CMEQz)
2012 MAKE_CASE(AArch64ISD::CMGEz)
2013 MAKE_CASE(AArch64ISD::CMGTz)
2014 MAKE_CASE(AArch64ISD::CMLEz)
2015 MAKE_CASE(AArch64ISD::CMLTz)
2016 MAKE_CASE(AArch64ISD::FCMEQz)
2017 MAKE_CASE(AArch64ISD::FCMGEz)
2018 MAKE_CASE(AArch64ISD::FCMGTz)
2019 MAKE_CASE(AArch64ISD::FCMLEz)
2020 MAKE_CASE(AArch64ISD::FCMLTz)
2021 MAKE_CASE(AArch64ISD::SADDV)
2022 MAKE_CASE(AArch64ISD::UADDV)
2023 MAKE_CASE(AArch64ISD::SRHADD)
2024 MAKE_CASE(AArch64ISD::URHADD)
2025 MAKE_CASE(AArch64ISD::SHADD)
2026 MAKE_CASE(AArch64ISD::UHADD)
2027 MAKE_CASE(AArch64ISD::SDOT)
2028 MAKE_CASE(AArch64ISD::UDOT)
2029 MAKE_CASE(AArch64ISD::SMINV)
2030 MAKE_CASE(AArch64ISD::UMINV)
2031 MAKE_CASE(AArch64ISD::SMAXV)
2032 MAKE_CASE(AArch64ISD::UMAXV)
2033 MAKE_CASE(AArch64ISD::SADDV_PRED)
2034 MAKE_CASE(AArch64ISD::UADDV_PRED)
2035 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2036 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2037 MAKE_CASE(AArch64ISD::SMINV_PRED)
2038 MAKE_CASE(AArch64ISD::UMINV_PRED)
2039 MAKE_CASE(AArch64ISD::ORV_PRED)
2040 MAKE_CASE(AArch64ISD::EORV_PRED)
2041 MAKE_CASE(AArch64ISD::ANDV_PRED)
2042 MAKE_CASE(AArch64ISD::CLASTA_N)
2043 MAKE_CASE(AArch64ISD::CLASTB_N)
2044 MAKE_CASE(AArch64ISD::LASTA)
2045 MAKE_CASE(AArch64ISD::LASTB)
2046 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2047 MAKE_CASE(AArch64ISD::LS64_BUILD)
2048 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2049 MAKE_CASE(AArch64ISD::TBL)
2050 MAKE_CASE(AArch64ISD::FADD_PRED)
2051 MAKE_CASE(AArch64ISD::FADDA_PRED)
2052 MAKE_CASE(AArch64ISD::FADDV_PRED)
2053 MAKE_CASE(AArch64ISD::FDIV_PRED)
2054 MAKE_CASE(AArch64ISD::FMA_PRED)
2055 MAKE_CASE(AArch64ISD::FMAX_PRED)
2056 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2057 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2058 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2059 MAKE_CASE(AArch64ISD::FMIN_PRED)
2060 MAKE_CASE(AArch64ISD::FMINV_PRED)
2061 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2062 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2063 MAKE_CASE(AArch64ISD::FMUL_PRED)
2064 MAKE_CASE(AArch64ISD::FSUB_PRED)
2065 MAKE_CASE(AArch64ISD::BIC)
2066 MAKE_CASE(AArch64ISD::BIT)
2067 MAKE_CASE(AArch64ISD::CBZ)
2068 MAKE_CASE(AArch64ISD::CBNZ)
2069 MAKE_CASE(AArch64ISD::TBZ)
2070 MAKE_CASE(AArch64ISD::TBNZ)
2071 MAKE_CASE(AArch64ISD::TC_RETURN)
2072 MAKE_CASE(AArch64ISD::PREFETCH)
2073 MAKE_CASE(AArch64ISD::SITOF)
2074 MAKE_CASE(AArch64ISD::UITOF)
2075 MAKE_CASE(AArch64ISD::NVCAST)
2076 MAKE_CASE(AArch64ISD::MRS)
2077 MAKE_CASE(AArch64ISD::SQSHL_I)
2078 MAKE_CASE(AArch64ISD::UQSHL_I)
2079 MAKE_CASE(AArch64ISD::SRSHR_I)
2080 MAKE_CASE(AArch64ISD::URSHR_I)
2081 MAKE_CASE(AArch64ISD::SQSHLU_I)
2082 MAKE_CASE(AArch64ISD::WrapperLarge)
2083 MAKE_CASE(AArch64ISD::LD2post)
2084 MAKE_CASE(AArch64ISD::LD3post)
2085 MAKE_CASE(AArch64ISD::LD4post)
2086 MAKE_CASE(AArch64ISD::ST2post)
2087 MAKE_CASE(AArch64ISD::ST3post)
2088 MAKE_CASE(AArch64ISD::ST4post)
2089 MAKE_CASE(AArch64ISD::LD1x2post)
2090 MAKE_CASE(AArch64ISD::LD1x3post)
2091 MAKE_CASE(AArch64ISD::LD1x4post)
2092 MAKE_CASE(AArch64ISD::ST1x2post)
2093 MAKE_CASE(AArch64ISD::ST1x3post)
2094 MAKE_CASE(AArch64ISD::ST1x4post)
2095 MAKE_CASE(AArch64ISD::LD1DUPpost)
2096 MAKE_CASE(AArch64ISD::LD2DUPpost)
2097 MAKE_CASE(AArch64ISD::LD3DUPpost)
2098 MAKE_CASE(AArch64ISD::LD4DUPpost)
2099 MAKE_CASE(AArch64ISD::LD1LANEpost)
2100 MAKE_CASE(AArch64ISD::LD2LANEpost)
2101 MAKE_CASE(AArch64ISD::LD3LANEpost)
2102 MAKE_CASE(AArch64ISD::LD4LANEpost)
2103 MAKE_CASE(AArch64ISD::ST2LANEpost)
2104 MAKE_CASE(AArch64ISD::ST3LANEpost)
2105 MAKE_CASE(AArch64ISD::ST4LANEpost)
2106 MAKE_CASE(AArch64ISD::SMULL)
2107 MAKE_CASE(AArch64ISD::UMULL)
2108 MAKE_CASE(AArch64ISD::FRECPE)
2109 MAKE_CASE(AArch64ISD::FRECPS)
2110 MAKE_CASE(AArch64ISD::FRSQRTE)
2111 MAKE_CASE(AArch64ISD::FRSQRTS)
2112 MAKE_CASE(AArch64ISD::STG)
2113 MAKE_CASE(AArch64ISD::STZG)
2114 MAKE_CASE(AArch64ISD::ST2G)
2115 MAKE_CASE(AArch64ISD::STZ2G)
2116 MAKE_CASE(AArch64ISD::SUNPKHI)
2117 MAKE_CASE(AArch64ISD::SUNPKLO)
2118 MAKE_CASE(AArch64ISD::UUNPKHI)
2119 MAKE_CASE(AArch64ISD::UUNPKLO)
2120 MAKE_CASE(AArch64ISD::INSR)
2121 MAKE_CASE(AArch64ISD::PTEST)
2122 MAKE_CASE(AArch64ISD::PTRUE)
2123 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2124 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2125 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2126 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2127 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2128 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2129 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2130 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2131 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2132 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2133 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2134 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2135 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2143 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2144 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2145 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2146 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2147 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2148 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2149 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2150 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2151 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2152 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2153 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2154 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2155 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2156 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2157 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2158 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2159 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2160 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2161 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2163 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2164 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2165 MAKE_CASE(AArch64ISD::ST1_PRED)
2166 MAKE_CASE(AArch64ISD::SST1_PRED)
2167 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2168 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2169 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2170 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2171 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2172 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2173 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2174 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2175 MAKE_CASE(AArch64ISD::LDP)
2176 MAKE_CASE(AArch64ISD::STP)
2177 MAKE_CASE(AArch64ISD::STNP)
2178 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2179 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2180 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2181 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2182 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2183 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2184 MAKE_CASE(AArch64ISD::UADDLP)
2185 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2186 }
2187#undef MAKE_CASE
2188 return nullptr;
2189}
2190
2191MachineBasicBlock *
2192AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2193 MachineBasicBlock *MBB) const {
2194 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2195 // phi node:
2196
2197 // OrigBB:
2198 // [... previous instrs leading to comparison ...]
2199 // b.ne TrueBB
2200 // b EndBB
2201 // TrueBB:
2202 // ; Fallthrough
2203 // EndBB:
2204 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2205
2206 MachineFunction *MF = MBB->getParent();
2207 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2208 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2209 DebugLoc DL = MI.getDebugLoc();
2210 MachineFunction::iterator It = ++MBB->getIterator();
2211
2212 Register DestReg = MI.getOperand(0).getReg();
2213 Register IfTrueReg = MI.getOperand(1).getReg();
2214 Register IfFalseReg = MI.getOperand(2).getReg();
2215 unsigned CondCode = MI.getOperand(3).getImm();
2216 bool NZCVKilled = MI.getOperand(4).isKill();
2217
2218 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2219 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2220 MF->insert(It, TrueBB);
2221 MF->insert(It, EndBB);
2222
2223 // Transfer rest of current basic-block to EndBB
2224 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2225 MBB->end());
2226 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2227
2228 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2229 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2230 MBB->addSuccessor(TrueBB);
2231 MBB->addSuccessor(EndBB);
2232
2233 // TrueBB falls through to the end.
2234 TrueBB->addSuccessor(EndBB);
2235
2236 if (!NZCVKilled) {
2237 TrueBB->addLiveIn(AArch64::NZCV);
2238 EndBB->addLiveIn(AArch64::NZCV);
2239 }
2240
2241 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2242 .addReg(IfTrueReg)
2243 .addMBB(TrueBB)
2244 .addReg(IfFalseReg)
2245 .addMBB(MBB);
2246
2247 MI.eraseFromParent();
2248 return EndBB;
2249}
2250
2251MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2252 MachineInstr &MI, MachineBasicBlock *BB) const {
2253 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast<void> (0))
2254 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast<void> (0))
2255 "SEH does not use catchret!")(static_cast<void> (0));
2256 return BB;
2257}
2258
2259MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2260 MachineInstr &MI, MachineBasicBlock *BB) const {
2261 switch (MI.getOpcode()) {
2262 default:
2263#ifndef NDEBUG1
2264 MI.dump();
2265#endif
2266 llvm_unreachable("Unexpected instruction for custom inserter!")__builtin_unreachable();
2267
2268 case AArch64::F128CSEL:
2269 return EmitF128CSEL(MI, BB);
2270
2271 case TargetOpcode::STACKMAP:
2272 case TargetOpcode::PATCHPOINT:
2273 case TargetOpcode::STATEPOINT:
2274 return emitPatchPoint(MI, BB);
2275
2276 case AArch64::CATCHRET:
2277 return EmitLoweredCatchRet(MI, BB);
2278 }
2279}
2280
2281//===----------------------------------------------------------------------===//
2282// AArch64 Lowering private implementation.
2283//===----------------------------------------------------------------------===//
2284
2285//===----------------------------------------------------------------------===//
2286// Lowering Code
2287//===----------------------------------------------------------------------===//
2288
2289// Forward declarations of SVE fixed length lowering helpers
2290static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2291static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2292static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2293static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2294 SelectionDAG &DAG);
2295
2296/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2297static bool isZerosVector(const SDNode *N) {
2298 // Look through a bit convert.
2299 while (N->getOpcode() == ISD::BITCAST)
2300 N = N->getOperand(0).getNode();
2301
2302 if (ISD::isConstantSplatVectorAllZeros(N))
2303 return true;
2304
2305 if (N->getOpcode() != AArch64ISD::DUP)
2306 return false;
2307
2308 auto Opnd0 = N->getOperand(0);
2309 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2310 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2311 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2312}
2313
2314/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2315/// CC
2316static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2317 switch (CC) {
2318 default:
2319 llvm_unreachable("Unknown condition code!")__builtin_unreachable();
2320 case ISD::SETNE:
2321 return AArch64CC::NE;
2322 case ISD::SETEQ:
2323 return AArch64CC::EQ;
2324 case ISD::SETGT:
2325 return AArch64CC::GT;
2326 case ISD::SETGE:
2327 return AArch64CC::GE;
2328 case ISD::SETLT:
2329 return AArch64CC::LT;
2330 case ISD::SETLE:
2331 return AArch64CC::LE;
2332 case ISD::SETUGT:
2333 return AArch64CC::HI;
2334 case ISD::SETUGE:
2335 return AArch64CC::HS;
2336 case ISD::SETULT:
2337 return AArch64CC::LO;
2338 case ISD::SETULE:
2339 return AArch64CC::LS;
2340 }
2341}
2342
2343/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2344static void changeFPCCToAArch64CC(ISD::CondCode CC,
2345 AArch64CC::CondCode &CondCode,
2346 AArch64CC::CondCode &CondCode2) {
2347 CondCode2 = AArch64CC::AL;
2348 switch (CC) {
2349 default:
2350 llvm_unreachable("Unknown FP condition!")__builtin_unreachable();
2351 case ISD::SETEQ:
2352 case ISD::SETOEQ:
2353 CondCode = AArch64CC::EQ;
2354 break;
2355 case ISD::SETGT:
2356 case ISD::SETOGT:
2357 CondCode = AArch64CC::GT;
2358 break;
2359 case ISD::SETGE:
2360 case ISD::SETOGE:
2361 CondCode = AArch64CC::GE;
2362 break;
2363 case ISD::SETOLT:
2364 CondCode = AArch64CC::MI;
2365 break;
2366 case ISD::SETOLE:
2367 CondCode = AArch64CC::LS;
2368 break;
2369 case ISD::SETONE:
2370 CondCode = AArch64CC::MI;
2371 CondCode2 = AArch64CC::GT;
2372 break;
2373 case ISD::SETO:
2374 CondCode = AArch64CC::VC;
2375 break;
2376 case ISD::SETUO:
2377 CondCode = AArch64CC::VS;
2378 break;
2379 case ISD::SETUEQ:
2380 CondCode = AArch64CC::EQ;
2381 CondCode2 = AArch64CC::VS;
2382 break;
2383 case ISD::SETUGT:
2384 CondCode = AArch64CC::HI;
2385 break;
2386 case ISD::SETUGE:
2387 CondCode = AArch64CC::PL;
2388 break;
2389 case ISD::SETLT:
2390 case ISD::SETULT:
2391 CondCode = AArch64CC::LT;
2392 break;
2393 case ISD::SETLE:
2394 case ISD::SETULE:
2395 CondCode = AArch64CC::LE;
2396 break;
2397 case ISD::SETNE:
2398 case ISD::SETUNE:
2399 CondCode = AArch64CC::NE;
2400 break;
2401 }
2402}
2403
2404/// Convert a DAG fp condition code to an AArch64 CC.
2405/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2406/// should be AND'ed instead of OR'ed.
2407static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2408 AArch64CC::CondCode &CondCode,
2409 AArch64CC::CondCode &CondCode2) {
2410 CondCode2 = AArch64CC::AL;
2411 switch (CC) {
2412 default:
2413 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2414 assert(CondCode2 == AArch64CC::AL)(static_cast<void> (0));
2415 break;
2416 case ISD::SETONE:
2417 // (a one b)
2418 // == ((a olt b) || (a ogt b))
2419 // == ((a ord b) && (a une b))
2420 CondCode = AArch64CC::VC;
2421 CondCode2 = AArch64CC::NE;
2422 break;
2423 case ISD::SETUEQ:
2424 // (a ueq b)
2425 // == ((a uno b) || (a oeq b))
2426 // == ((a ule b) && (a uge b))
2427 CondCode = AArch64CC::PL;
2428 CondCode2 = AArch64CC::LE;
2429 break;
2430 }
2431}
2432
2433/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2434/// CC usable with the vector instructions. Fewer operations are available
2435/// without a real NZCV register, so we have to use less efficient combinations
2436/// to get the same effect.
2437static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2438 AArch64CC::CondCode &CondCode,
2439 AArch64CC::CondCode &CondCode2,
2440 bool &Invert) {
2441 Invert = false;
2442 switch (CC) {
2443 default:
2444 // Mostly the scalar mappings work fine.
2445 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2446 break;
2447 case ISD::SETUO:
2448 Invert = true;
2449 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2450 case ISD::SETO:
2451 CondCode = AArch64CC::MI;
2452 CondCode2 = AArch64CC::GE;
2453 break;
2454 case ISD::SETUEQ:
2455 case ISD::SETULT:
2456 case ISD::SETULE:
2457 case ISD::SETUGT:
2458 case ISD::SETUGE:
2459 // All of the compare-mask comparisons are ordered, but we can switch
2460 // between the two by a double inversion. E.g. ULE == !OGT.
2461 Invert = true;
2462 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2463 CondCode, CondCode2);
2464 break;
2465 }
2466}
2467
2468static bool isLegalArithImmed(uint64_t C) {
2469 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2470 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2471 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { } while (false)
2472 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { } while (false);
2473 return IsLegal;
2474}
2475
2476// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2477// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2478// can be set differently by this operation. It comes down to whether
2479// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2480// everything is fine. If not then the optimization is wrong. Thus general
2481// comparisons are only valid if op2 != 0.
2482//
2483// So, finally, the only LLVM-native comparisons that don't mention C and V
2484// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2485// the absence of information about op2.
2486static bool isCMN(SDValue Op, ISD::CondCode CC) {
2487 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2488 (CC == ISD::SETEQ || CC == ISD::SETNE);
2489}
2490
2491static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2492 SelectionDAG &DAG, SDValue Chain,
2493 bool IsSignaling) {
2494 EVT VT = LHS.getValueType();
2495 assert(VT != MVT::f128)(static_cast<void> (0));
2496 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
2497 unsigned Opcode =
2498 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2499 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2500}
2501
2502static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2503 const SDLoc &dl, SelectionDAG &DAG) {
2504 EVT VT = LHS.getValueType();
2505 const bool FullFP16 =
2506 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2507
2508 if (VT.isFloatingPoint()) {
2509 assert(VT != MVT::f128)(static_cast<void> (0));
2510 if (VT == MVT::f16 && !FullFP16) {
2511 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2512 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2513 VT = MVT::f32;
2514 }
2515 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2516 }
2517
2518 // The CMP instruction is just an alias for SUBS, and representing it as
2519 // SUBS means that it's possible to get CSE with subtract operations.
2520 // A later phase can perform the optimization of setting the destination
2521 // register to WZR/XZR if it ends up being unused.
2522 unsigned Opcode = AArch64ISD::SUBS;
2523
2524 if (isCMN(RHS, CC)) {
2525 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2526 Opcode = AArch64ISD::ADDS;
2527 RHS = RHS.getOperand(1);
2528 } else if (isCMN(LHS, CC)) {
2529 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2530 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2531 Opcode = AArch64ISD::ADDS;
2532 LHS = LHS.getOperand(1);
2533 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2534 if (LHS.getOpcode() == ISD::AND) {
2535 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2536 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2537 // of the signed comparisons.
2538 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2539 DAG.getVTList(VT, MVT_CC),
2540 LHS.getOperand(0),
2541 LHS.getOperand(1));
2542 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2543 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2544 return ANDSNode.getValue(1);
2545 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2546 // Use result of ANDS
2547 return LHS.getValue(1);
2548 }
2549 }
2550
2551 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2552 .getValue(1);
2553}
2554
2555/// \defgroup AArch64CCMP CMP;CCMP matching
2556///
2557/// These functions deal with the formation of CMP;CCMP;... sequences.
2558/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2559/// a comparison. They set the NZCV flags to a predefined value if their
2560/// predicate is false. This allows to express arbitrary conjunctions, for
2561/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2562/// expressed as:
2563/// cmp A
2564/// ccmp B, inv(CB), CA
2565/// check for CB flags
2566///
2567/// This naturally lets us implement chains of AND operations with SETCC
2568/// operands. And we can even implement some other situations by transforming
2569/// them:
2570/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2571/// negating the flags used in a CCMP/FCCMP operations.
2572/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2573/// by negating the flags we test for afterwards. i.e.
2574/// NEG (CMP CCMP CCCMP ...) can be implemented.
2575/// - Note that we can only ever negate all previously processed results.
2576/// What we can not implement by flipping the flags to test is a negation
2577/// of two sub-trees (because the negation affects all sub-trees emitted so
2578/// far, so the 2nd sub-tree we emit would also affect the first).
2579/// With those tools we can implement some OR operations:
2580/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2581/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2582/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2583/// elimination rules from earlier to implement the whole thing as a
2584/// CCMP/FCCMP chain.
2585///
2586/// As complete example:
2587/// or (or (setCA (cmp A)) (setCB (cmp B)))
2588/// (and (setCC (cmp C)) (setCD (cmp D)))"
2589/// can be reassociated to:
2590/// or (and (setCC (cmp C)) setCD (cmp D))
2591// (or (setCA (cmp A)) (setCB (cmp B)))
2592/// can be transformed to:
2593/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2594/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2595/// which can be implemented as:
2596/// cmp C
2597/// ccmp D, inv(CD), CC
2598/// ccmp A, CA, inv(CD)
2599/// ccmp B, CB, inv(CA)
2600/// check for CB flags
2601///
2602/// A counterexample is "or (and A B) (and C D)" which translates to
2603/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2604/// can only implement 1 of the inner (not) operations, but not both!
2605/// @{
2606
2607/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2608static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2609 ISD::CondCode CC, SDValue CCOp,
2610 AArch64CC::CondCode Predicate,
2611 AArch64CC::CondCode OutCC,
2612 const SDLoc &DL, SelectionDAG &DAG) {
2613 unsigned Opcode = 0;
2614 const bool FullFP16 =
2615 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2616
2617 if (LHS.getValueType().isFloatingPoint()) {
2618 assert(LHS.getValueType() != MVT::f128)(static_cast<void> (0));
2619 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2620 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2621 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2622 }
2623 Opcode = AArch64ISD::FCCMP;
2624 } else if (RHS.getOpcode() == ISD::SUB) {
2625 SDValue SubOp0 = RHS.getOperand(0);
2626 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2627 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2628 Opcode = AArch64ISD::CCMN;
2629 RHS = RHS.getOperand(1);
2630 }
2631 }
2632 if (Opcode == 0)
2633 Opcode = AArch64ISD::CCMP;
2634
2635 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2636 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2637 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2638 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2639 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2640}
2641
2642/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2643/// expressed as a conjunction. See \ref AArch64CCMP.
2644/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2645/// changing the conditions on the SETCC tests.
2646/// (this means we can call emitConjunctionRec() with
2647/// Negate==true on this sub-tree)
2648/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2649/// cannot do the negation naturally. We are required to
2650/// emit the subtree first in this case.
2651/// \param WillNegate Is true if are called when the result of this
2652/// subexpression must be negated. This happens when the
2653/// outer expression is an OR. We can use this fact to know
2654/// that we have a double negation (or (or ...) ...) that
2655/// can be implemented for free.
2656static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2657 bool &MustBeFirst, bool WillNegate,
2658 unsigned Depth = 0) {
2659 if (!Val.hasOneUse())
2660 return false;
2661 unsigned Opcode = Val->getOpcode();
2662 if (Opcode == ISD::SETCC) {
2663 if (Val->getOperand(0).getValueType() == MVT::f128)
2664 return false;
2665 CanNegate = true;
2666 MustBeFirst = false;
2667 return true;
2668 }
2669 // Protect against exponential runtime and stack overflow.
2670 if (Depth > 6)
2671 return false;
2672 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2673 bool IsOR = Opcode == ISD::OR;
2674 SDValue O0 = Val->getOperand(0);
2675 SDValue O1 = Val->getOperand(1);
2676 bool CanNegateL;
2677 bool MustBeFirstL;
2678 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2679 return false;
2680 bool CanNegateR;
2681 bool MustBeFirstR;
2682 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2683 return false;
2684
2685 if (MustBeFirstL && MustBeFirstR)
2686 return false;
2687
2688 if (IsOR) {
2689 // For an OR expression we need to be able to naturally negate at least
2690 // one side or we cannot do the transformation at all.
2691 if (!CanNegateL && !CanNegateR)
2692 return false;
2693 // If we the result of the OR will be negated and we can naturally negate
2694 // the leafs, then this sub-tree as a whole negates naturally.
2695 CanNegate = WillNegate && CanNegateL && CanNegateR;
2696 // If we cannot naturally negate the whole sub-tree, then this must be
2697 // emitted first.
2698 MustBeFirst = !CanNegate;
2699 } else {
2700 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast<void> (0));
2701 // We cannot naturally negate an AND operation.
2702 CanNegate = false;
2703 MustBeFirst = MustBeFirstL || MustBeFirstR;
2704 }
2705 return true;
2706 }
2707 return false;
2708}
2709
2710/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2711/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2712/// Tries to transform the given i1 producing node @p Val to a series compare
2713/// and conditional compare operations. @returns an NZCV flags producing node
2714/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2715/// transformation was not possible.
2716/// \p Negate is true if we want this sub-tree being negated just by changing
2717/// SETCC conditions.
2718static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2719 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2720 AArch64CC::CondCode Predicate) {
2721 // We're at a tree leaf, produce a conditional comparison operation.
2722 unsigned Opcode = Val->getOpcode();
2723 if (Opcode == ISD::SETCC) {
2724 SDValue LHS = Val->getOperand(0);
2725 SDValue RHS = Val->getOperand(1);
2726 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2727 bool isInteger = LHS.getValueType().isInteger();
2728 if (Negate)
2729 CC = getSetCCInverse(CC, LHS.getValueType());
2730 SDLoc DL(Val);
2731 // Determine OutCC and handle FP special case.
2732 if (isInteger) {
2733 OutCC = changeIntCCToAArch64CC(CC);
2734 } else {
2735 assert(LHS.getValueType().isFloatingPoint())(static_cast<void> (0));
2736 AArch64CC::CondCode ExtraCC;
2737 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2738 // Some floating point conditions can't be tested with a single condition
2739 // code. Construct an additional comparison in this case.
2740 if (ExtraCC != AArch64CC::AL) {
2741 SDValue ExtraCmp;
2742 if (!CCOp.getNode())
2743 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2744 else
2745 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2746 ExtraCC, DL, DAG);
2747 CCOp = ExtraCmp;
2748 Predicate = ExtraCC;
2749 }
2750 }
2751
2752 // Produce a normal comparison if we are first in the chain
2753 if (!CCOp)
2754 return emitComparison(LHS, RHS, CC, DL, DAG);
2755 // Otherwise produce a ccmp.
2756 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2757 DAG);
2758 }
2759 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2760
2761 bool IsOR = Opcode == ISD::OR;
2762
2763 SDValue LHS = Val->getOperand(0);
2764 bool CanNegateL;
2765 bool MustBeFirstL;
2766 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2767 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2768 (void)ValidL;
2769
2770 SDValue RHS = Val->getOperand(1);
2771 bool CanNegateR;
2772 bool MustBeFirstR;
2773 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2774 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2775 (void)ValidR;
2776
2777 // Swap sub-tree that must come first to the right side.
2778 if (MustBeFirstL) {
2779 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2780 std::swap(LHS, RHS);
2781 std::swap(CanNegateL, CanNegateR);
2782 std::swap(MustBeFirstL, MustBeFirstR);
2783 }
2784
2785 bool NegateR;
2786 bool NegateAfterR;
2787 bool NegateL;
2788 bool NegateAfterAll;
2789 if (Opcode == ISD::OR) {
2790 // Swap the sub-tree that we can negate naturally to the left.
2791 if (!CanNegateL) {
2792 assert(CanNegateR && "at least one side must be negatable")(static_cast<void> (0));
2793 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast<void> (0));
2794 assert(!Negate)(static_cast<void> (0));
2795 std::swap(LHS, RHS);
2796 NegateR = false;
2797 NegateAfterR = true;
2798 } else {
2799 // Negate the left sub-tree if possible, otherwise negate the result.
2800 NegateR = CanNegateR;
2801 NegateAfterR = !CanNegateR;
2802 }
2803 NegateL = true;
2804 NegateAfterAll = !Negate;
2805 } else {
2806 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2807 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast<void> (0));
2808
2809 NegateL = false;
2810 NegateR = false;
2811 NegateAfterR = false;
2812 NegateAfterAll = false;
2813 }
2814
2815 // Emit sub-trees.
2816 AArch64CC::CondCode RHSCC;
2817 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2818 if (NegateAfterR)
2819 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2820 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2821 if (NegateAfterAll)
2822 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2823 return CmpL;
2824}
2825
2826/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2827/// In some cases this is even possible with OR operations in the expression.
2828/// See \ref AArch64CCMP.
2829/// \see emitConjunctionRec().
2830static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2831 AArch64CC::CondCode &OutCC) {
2832 bool DummyCanNegate;
2833 bool DummyMustBeFirst;
2834 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2835 return SDValue();
2836
2837 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2838}
2839
2840/// @}
2841
2842/// Returns how profitable it is to fold a comparison's operand's shift and/or
2843/// extension operations.
2844static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2845 auto isSupportedExtend = [&](SDValue V) {
2846 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2847 return true;
2848
2849 if (V.getOpcode() == ISD::AND)
2850 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2851 uint64_t Mask = MaskCst->getZExtValue();
2852 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2853 }
2854
2855 return false;
2856 };
2857
2858 if (!Op.hasOneUse())
2859 return 0;
2860
2861 if (isSupportedExtend(Op))
2862 return 1;
2863
2864 unsigned Opc = Op.getOpcode();
2865 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2866 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2867 uint64_t Shift = ShiftCst->getZExtValue();
2868 if (isSupportedExtend(Op.getOperand(0)))
2869 return (Shift <= 4) ? 2 : 1;
2870 EVT VT = Op.getValueType();
2871 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2872 return 1;
2873 }
2874
2875 return 0;
2876}
2877
2878static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2879 SDValue &AArch64cc, SelectionDAG &DAG,
2880 const SDLoc &dl) {
2881 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2882 EVT VT = RHS.getValueType();
2883 uint64_t C = RHSC->getZExtValue();
2884 if (!isLegalArithImmed(C)) {
2885 // Constant does not fit, try adjusting it by one?
2886 switch (CC) {
2887 default:
2888 break;
2889 case ISD::SETLT:
2890 case ISD::SETGE:
2891 if ((VT == MVT::i32 && C != 0x80000000 &&
2892 isLegalArithImmed((uint32_t)(C - 1))) ||
2893 (VT == MVT::i64 && C != 0x80000000ULL &&
2894 isLegalArithImmed(C - 1ULL))) {
2895 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2896 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2897 RHS = DAG.getConstant(C, dl, VT);
2898 }
2899 break;
2900 case ISD::SETULT:
2901 case ISD::SETUGE:
2902 if ((VT == MVT::i32 && C != 0 &&
2903 isLegalArithImmed((uint32_t)(C - 1))) ||
2904 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2905 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2906 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2907 RHS = DAG.getConstant(C, dl, VT);
2908 }
2909 break;
2910 case ISD::SETLE:
2911 case ISD::SETGT:
2912 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2913 isLegalArithImmed((uint32_t)(C + 1))) ||
2914 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2915 isLegalArithImmed(C + 1ULL))) {
2916 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2917 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2918 RHS = DAG.getConstant(C, dl, VT);
2919 }
2920 break;
2921 case ISD::SETULE:
2922 case ISD::SETUGT:
2923 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2924 isLegalArithImmed((uint32_t)(C + 1))) ||
2925 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2926 isLegalArithImmed(C + 1ULL))) {
2927 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2928 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2929 RHS = DAG.getConstant(C, dl, VT);
2930 }
2931 break;
2932 }
2933 }
2934 }
2935
2936 // Comparisons are canonicalized so that the RHS operand is simpler than the
2937 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2938 // can fold some shift+extend operations on the RHS operand, so swap the
2939 // operands if that can be done.
2940 //
2941 // For example:
2942 // lsl w13, w11, #1
2943 // cmp w13, w12
2944 // can be turned into:
2945 // cmp w12, w11, lsl #1
2946 if (!isa<ConstantSDNode>(RHS) ||
2947 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2948 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2949
2950 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2951 std::swap(LHS, RHS);
2952 CC = ISD::getSetCCSwappedOperands(CC);
2953 }
2954 }
2955
2956 SDValue Cmp;
2957 AArch64CC::CondCode AArch64CC;
2958 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2959 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2960
2961 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2962 // For the i8 operand, the largest immediate is 255, so this can be easily
2963 // encoded in the compare instruction. For the i16 operand, however, the
2964 // largest immediate cannot be encoded in the compare.
2965 // Therefore, use a sign extending load and cmn to avoid materializing the
2966 // -1 constant. For example,
2967 // movz w1, #65535
2968 // ldrh w0, [x0, #0]
2969 // cmp w0, w1
2970 // >
2971 // ldrsh w0, [x0, #0]
2972 // cmn w0, #1
2973 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2974 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2975 // ensure both the LHS and RHS are truly zero extended and to make sure the
2976 // transformation is profitable.
2977 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2978 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2979 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2980 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2981 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2982 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2983 SDValue SExt =
2984 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2985 DAG.getValueType(MVT::i16));
2986 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2987 RHS.getValueType()),
2988 CC, dl, DAG);
2989 AArch64CC = changeIntCCToAArch64CC(CC);
2990 }
2991 }
2992
2993 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2994 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2995 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2996 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2997 }
2998 }
2999 }
3000
3001 if (!Cmp) {
3002 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3003 AArch64CC = changeIntCCToAArch64CC(CC);
3004 }
3005 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3006 return Cmp;
3007}
3008
3009static std::pair<SDValue, SDValue>
3010getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3011 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast<void> (0))
3012 "Unsupported value type")(static_cast<void> (0));
3013 SDValue Value, Overflow;
3014 SDLoc DL(Op);
3015 SDValue LHS = Op.getOperand(0);
3016 SDValue RHS = Op.getOperand(1);
3017 unsigned Opc = 0;
3018 switch (Op.getOpcode()) {
3019 default:
3020 llvm_unreachable("Unknown overflow instruction!")__builtin_unreachable();
3021 case ISD::SADDO:
3022 Opc = AArch64ISD::ADDS;
3023 CC = AArch64CC::VS;
3024 break;
3025 case ISD::UADDO:
3026 Opc = AArch64ISD::ADDS;
3027 CC = AArch64CC::HS;
3028 break;
3029 case ISD::SSUBO:
3030 Opc = AArch64ISD::SUBS;
3031 CC = AArch64CC::VS;
3032 break;
3033 case ISD::USUBO:
3034 Opc = AArch64ISD::SUBS;
3035 CC = AArch64CC::LO;
3036 break;
3037 // Multiply needs a little bit extra work.
3038 case ISD::SMULO:
3039 case ISD::UMULO: {
3040 CC = AArch64CC::NE;
3041 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3042 if (Op.getValueType() == MVT::i32) {
3043 // Extend to 64-bits, then perform a 64-bit multiply.
3044 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3045 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3046 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3047 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3048 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3049
3050 // Check that the result fits into a 32-bit integer.
3051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3052 if (IsSigned) {
3053 // cmp xreg, wreg, sxtw
3054 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3055 Overflow =
3056 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3057 } else {
3058 // tst xreg, #0xffffffff00000000
3059 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3060 Overflow =
3061 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3062 }
3063 break;
3064 }
3065 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast<void> (0));
3066 // For the 64 bit multiply
3067 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3068 if (IsSigned) {
3069 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3070 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3071 DAG.getConstant(63, DL, MVT::i64));
3072 // It is important that LowerBits is last, otherwise the arithmetic
3073 // shift will not be folded into the compare (SUBS).
3074 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3075 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3076 .getValue(1);
3077 } else {
3078 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3079 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3080 Overflow =
3081 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3082 DAG.getConstant(0, DL, MVT::i64),
3083 UpperBits).getValue(1);
3084 }
3085 break;
3086 }
3087 } // switch (...)
3088
3089 if (Opc) {
3090 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3091
3092 // Emit the AArch64 operation with overflow check.
3093 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3094 Overflow = Value.getValue(1);
3095 }
3096 return std::make_pair(Value, Overflow);
3097}
3098
3099SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3100 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3101 return LowerToScalableOp(Op, DAG);
3102
3103 SDValue Sel = Op.getOperand(0);
3104 SDValue Other = Op.getOperand(1);
3105 SDLoc dl(Sel);
3106
3107 // If the operand is an overflow checking operation, invert the condition
3108 // code and kill the Not operation. I.e., transform:
3109 // (xor (overflow_op_bool, 1))
3110 // -->
3111 // (csel 1, 0, invert(cc), overflow_op_bool)
3112 // ... which later gets transformed to just a cset instruction with an
3113 // inverted condition code, rather than a cset + eor sequence.
3114 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3115 // Only lower legal XALUO ops.
3116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3117 return SDValue();
3118
3119 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3120 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3121 AArch64CC::CondCode CC;
3122 SDValue Value, Overflow;
3123 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3124 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3125 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3126 CCVal, Overflow);
3127 }
3128 // If neither operand is a SELECT_CC, give up.
3129 if (Sel.getOpcode() != ISD::SELECT_CC)
3130 std::swap(Sel, Other);
3131 if (Sel.getOpcode() != ISD::SELECT_CC)
3132 return Op;
3133
3134 // The folding we want to perform is:
3135 // (xor x, (select_cc a, b, cc, 0, -1) )
3136 // -->
3137 // (csel x, (xor x, -1), cc ...)
3138 //
3139 // The latter will get matched to a CSINV instruction.
3140
3141 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3142 SDValue LHS = Sel.getOperand(0);
3143 SDValue RHS = Sel.getOperand(1);
3144 SDValue TVal = Sel.getOperand(2);
3145 SDValue FVal = Sel.getOperand(3);
3146
3147 // FIXME: This could be generalized to non-integer comparisons.
3148 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3149 return Op;
3150
3151 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3152 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3153
3154 // The values aren't constants, this isn't the pattern we're looking for.
3155 if (!CFVal || !CTVal)
3156 return Op;
3157
3158 // We can commute the SELECT_CC by inverting the condition. This
3159 // might be needed to make this fit into a CSINV pattern.
3160 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3161 std::swap(TVal, FVal);
3162 std::swap(CTVal, CFVal);
3163 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3164 }
3165
3166 // If the constants line up, perform the transform!
3167 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3168 SDValue CCVal;
3169 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3170
3171 FVal = Other;
3172 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3173 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3174
3175 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3176 CCVal, Cmp);
3177 }
3178
3179 return Op;
3180}
3181
3182static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3183 EVT VT = Op.getValueType();
3184
3185 // Let legalize expand this if it isn't a legal type yet.
3186 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3187 return SDValue();
3188
3189 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3190
3191 unsigned Opc;
3192 bool ExtraOp = false;
3193 switch (Op.getOpcode()) {
3194 default:
3195 llvm_unreachable("Invalid code")__builtin_unreachable();
3196 case ISD::ADDC:
3197 Opc = AArch64ISD::ADDS;
3198 break;
3199 case ISD::SUBC:
3200 Opc = AArch64ISD::SUBS;
3201 break;
3202 case ISD::ADDE:
3203 Opc = AArch64ISD::ADCS;
3204 ExtraOp = true;
3205 break;
3206 case ISD::SUBE:
3207 Opc = AArch64ISD::SBCS;
3208 ExtraOp = true;
3209 break;
3210 }
3211
3212 if (!ExtraOp)
3213 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3214 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3215 Op.getOperand(2));
3216}
3217
3218static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3219 // Let legalize expand this if it isn't a legal type yet.
3220 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3221 return SDValue();
3222
3223 SDLoc dl(Op);
3224 AArch64CC::CondCode CC;
3225 // The actual operation that sets the overflow or carry flag.
3226 SDValue Value, Overflow;
3227 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3228
3229 // We use 0 and 1 as false and true values.
3230 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3231 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3232
3233 // We use an inverted condition, because the conditional select is inverted
3234 // too. This will allow it to be selected to a single instruction:
3235 // CSINC Wd, WZR, WZR, invert(cond).
3236 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3237 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3238 CCVal, Overflow);
3239
3240 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3241 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3242}
3243
3244// Prefetch operands are:
3245// 1: Address to prefetch
3246// 2: bool isWrite
3247// 3: int locality (0 = no locality ... 3 = extreme locality)
3248// 4: bool isDataCache
3249static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3250 SDLoc DL(Op);
3251 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3252 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3253 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3254
3255 bool IsStream = !Locality;
3256 // When the locality number is set
3257 if (Locality) {
3258 // The front-end should have filtered out the out-of-range values
3259 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast<void> (0));
3260 // The locality degree is the opposite of the cache speed.
3261 // Put the number the other way around.
3262 // The encoding starts at 0 for level 1
3263 Locality = 3 - Locality;
3264 }
3265
3266 // built the mask value encoding the expected behavior.
3267 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3268 (!IsData << 3) | // IsDataCache bit
3269 (Locality << 1) | // Cache level bits
3270 (unsigned)IsStream; // Stream bit
3271 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3272 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3273}
3274
3275SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3276 SelectionDAG &DAG) const {
3277 EVT VT = Op.getValueType();
3278 if (VT.isScalableVector())
3279 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3280
3281 if (useSVEForFixedLengthVectorVT(VT))
3282 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3283
3284 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast<void> (0));
3285 return SDValue();
3286}
3287
3288SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 if (Op.getValueType().isScalableVector())
3291 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3292
3293 bool IsStrict = Op->isStrictFPOpcode();
3294 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3295 EVT SrcVT = SrcVal.getValueType();
3296
3297 if (useSVEForFixedLengthVectorVT(SrcVT))
3298 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3299
3300 if (SrcVT != MVT::f128) {
3301 // Expand cases where the input is a vector bigger than NEON.
3302 if (useSVEForFixedLengthVectorVT(SrcVT))
3303 return SDValue();
3304
3305 // It's legal except when f128 is involved
3306 return Op;
3307 }
3308
3309 return SDValue();
3310}
3311
3312SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3313 SelectionDAG &DAG) const {
3314 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3315 // Any additional optimization in this function should be recorded
3316 // in the cost tables.
3317 EVT InVT = Op.getOperand(0).getValueType();
3318 EVT VT = Op.getValueType();
3319
3320 if (VT.isScalableVector()) {
3321 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3322 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3323 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3324 return LowerToPredicatedOp(Op, DAG, Opcode);
3325 }
3326
3327 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3328 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3329
3330 unsigned NumElts = InVT.getVectorNumElements();
3331
3332 // f16 conversions are promoted to f32 when full fp16 is not supported.
3333 if (InVT.getVectorElementType() == MVT::f16 &&
3334 !Subtarget->hasFullFP16()) {
3335 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3336 SDLoc dl(Op);
3337 return DAG.getNode(
3338 Op.getOpcode(), dl, Op.getValueType(),
3339 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3340 }
3341
3342 uint64_t VTSize = VT.getFixedSizeInBits();
3343 uint64_t InVTSize = InVT.getFixedSizeInBits();
3344 if (VTSize < InVTSize) {
3345 SDLoc dl(Op);
3346 SDValue Cv =
3347 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3348 Op.getOperand(0));
3349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3350 }
3351
3352 if (VTSize > InVTSize) {
3353 SDLoc dl(Op);
3354 MVT ExtVT =
3355 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3356 VT.getVectorNumElements());
3357 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3358 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3359 }
3360
3361 // Type changing conversions are illegal.
3362 return Op;
3363}
3364
3365SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 bool IsStrict = Op->isStrictFPOpcode();
3368 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3369
3370 if (SrcVal.getValueType().isVector())
3371 return LowerVectorFP_TO_INT(Op, DAG);
3372
3373 // f16 conversions are promoted to f32 when full fp16 is not supported.
3374 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3375 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
3376 SDLoc dl(Op);
3377 return DAG.getNode(
3378 Op.getOpcode(), dl, Op.getValueType(),
3379 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3380 }
3381
3382 if (SrcVal.getValueType() != MVT::f128) {
3383 // It's legal except when f128 is involved
3384 return Op;
3385 }
3386
3387 return SDValue();
3388}
3389
3390SDValue
3391AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3392 SelectionDAG &DAG) const {
3393 // AArch64 FP-to-int conversions saturate to the destination element size, so
3394 // we can lower common saturating conversions to simple instructions.
3395 SDValue SrcVal = Op.getOperand(0);
3396 EVT SrcVT = SrcVal.getValueType();
3397 EVT DstVT = Op.getValueType();
3398 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3399
3400 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3401 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3402 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3403 assert(SatWidth <= DstElementWidth &&(static_cast<void> (0))
3404 "Saturation width cannot exceed result width")(static_cast<void> (0));
3405
3406 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3407 // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
3408 // types, so this is hard to reach.
3409 if (DstVT.isScalableVector())
3410 return SDValue();
3411
3412 // TODO: Saturate to SatWidth explicitly.
3413 if (SatWidth != DstElementWidth)
3414 return SDValue();
3415
3416 EVT SrcElementVT = SrcVT.getVectorElementType();
3417
3418 // In the absence of FP16 support, promote f16 to f32, like
3419 // LowerVectorFP_TO_INT().
3420 if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3421 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3422 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3423 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3424 Op.getOperand(1));
3425 }
3426
3427 // Cases that we can emit directly.
3428 if ((SrcElementWidth == DstElementWidth) &&
3429 (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3430 (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3431 return Op;
3432 }
3433
3434 // For all other cases, fall back on the expanded form.
3435 return SDValue();
3436}
3437
3438SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 // AArch64 FP-to-int conversions saturate to the destination register size, so
3441 // we can lower common saturating conversions to simple instructions.
3442 SDValue SrcVal = Op.getOperand(0);
3443 EVT SrcVT = SrcVal.getValueType();
3444
3445 if (SrcVT.isVector())
3446 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3447
3448 EVT DstVT = Op.getValueType();
3449 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3450 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3451 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3452 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast<void> (0));
3453
3454 // TODO: Saturate to SatWidth explicitly.
3455 if (SatWidth != DstWidth)
3456 return SDValue();
3457
3458 // In the absence of FP16 support, promote f16 to f32, like LowerFP_TO_INT().
3459 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3460 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3461 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3462 Op.getOperand(1));
3463
3464 // Cases that we can emit directly.
3465 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3466 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3467 (DstVT == MVT::i64 || DstVT == MVT::i32))
3468 return Op;
3469
3470 // For all other cases, fall back on the expanded form.
3471 return SDValue();
3472}
3473
3474SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3475 SelectionDAG &DAG) const {
3476 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3477 // Any additional optimization in this function should be recorded
3478 // in the cost tables.
3479 EVT VT = Op.getValueType();
3480 SDLoc dl(Op);
3481 SDValue In = Op.getOperand(0);
3482 EVT InVT = In.getValueType();
3483 unsigned Opc = Op.getOpcode();
3484 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3485
3486 if (VT.isScalableVector()) {
3487 if (InVT.getVectorElementType() == MVT::i1) {
3488 // We can't directly extend an SVE predicate; extend it first.
3489 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3490 EVT CastVT = getPromotedVTForPredicate(InVT);
3491 In = DAG.getNode(CastOpc, dl, CastVT, In);
3492 return DAG.getNode(Opc, dl, VT, In);
3493 }
3494
3495 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3496 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3497 return LowerToPredicatedOp(Op, DAG, Opcode);
3498 }
3499
3500 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3501 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3502
3503 uint64_t VTSize = VT.getFixedSizeInBits();
3504 uint64_t InVTSize = InVT.getFixedSizeInBits();
3505 if (VTSize < InVTSize) {
3506 MVT CastVT =
3507 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3508 InVT.getVectorNumElements());
3509 In = DAG.getNode(Opc, dl, CastVT, In);
3510 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3511 }
3512
3513 if (VTSize > InVTSize) {
3514 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3515 EVT CastVT = VT.changeVectorElementTypeToInteger();
3516 In = DAG.getNode(CastOpc, dl, CastVT, In);
3517 return DAG.getNode(Opc, dl, VT, In);
3518 }
3519
3520 return Op;
3521}
3522
3523SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 if (Op.getValueType().isVector())
3526 return LowerVectorINT_TO_FP(Op, DAG);
3527
3528 bool IsStrict = Op->isStrictFPOpcode();
3529 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3530
3531 // f16 conversions are promoted to f32 when full fp16 is not supported.
3532 if (Op.getValueType() == MVT::f16 &&
3533 !Subtarget->hasFullFP16()) {
3534 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast<void> (0));
3535 SDLoc dl(Op);
3536 return DAG.getNode(
3537 ISD::FP_ROUND, dl, MVT::f16,
3538 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3539 DAG.getIntPtrConstant(0, dl));
3540 }
3541
3542 // i128 conversions are libcalls.
3543 if (SrcVal.getValueType() == MVT::i128)
3544 return SDValue();
3545
3546 // Other conversions are legal, unless it's to the completely software-based
3547 // fp128.
3548 if (Op.getValueType() != MVT::f128)
3549 return Op;
3550 return SDValue();
3551}
3552
3553SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3554 SelectionDAG &DAG) const {
3555 // For iOS, we want to call an alternative entry point: __sincos_stret,
3556 // which returns the values in two S / D registers.
3557 SDLoc dl(Op);
3558 SDValue Arg = Op.getOperand(0);
3559 EVT ArgVT = Arg.getValueType();
3560 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3561
3562 ArgListTy Args;
3563 ArgListEntry Entry;
3564
3565 Entry.Node = Arg;
3566 Entry.Ty = ArgTy;
3567 Entry.IsSExt = false;
3568 Entry.IsZExt = false;
3569 Args.push_back(Entry);
3570
3571 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3572 : RTLIB::SINCOS_STRET_F32;
3573 const char *LibcallName = getLibcallName(LC);
3574 SDValue Callee =
3575 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3576
3577 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3578 TargetLowering::CallLoweringInfo CLI(DAG);
3579 CLI.setDebugLoc(dl)
3580 .setChain(DAG.getEntryNode())
3581 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3582
3583 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3584 return CallResult.first;
3585}
3586
3587static MVT getSVEContainerType(EVT ContentTy);
3588
3589SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 EVT OpVT = Op.getValueType();
3592 EVT ArgVT = Op.getOperand(0).getValueType();
3593
3594 if (useSVEForFixedLengthVectorVT(OpVT))
3595 return LowerFixedLengthBitcastToSVE(Op, DAG);
3596
3597 if (OpVT.isScalableVector()) {
3598 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3599 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast<void> (0))
3600 "Expected int->fp bitcast!")(static_cast<void> (0));
3601 SDValue ExtResult =
3602 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3603 Op.getOperand(0));
3604 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3605 }
3606 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3607 }
3608
3609 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3610 return SDValue();
3611
3612 assert(ArgVT == MVT::i16)(static_cast<void> (0));
3613 SDLoc DL(Op);
3614
3615 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3616 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3617 return SDValue(
3618 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3619 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3620 0);
3621}
3622
3623static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3624 if (OrigVT.getSizeInBits() >= 64)
3625 return OrigVT;
3626
3627 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast<void> (0));
3628
3629 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3630 switch (OrigSimpleTy) {
3631 default: llvm_unreachable("Unexpected Vector Type")__builtin_unreachable();
3632 case MVT::v2i8:
3633 case MVT::v2i16:
3634 return MVT::v2i32;
3635 case MVT::v4i8:
3636 return MVT::v4i16;
3637 }
3638}
3639
3640static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3641 const EVT &OrigTy,
3642 const EVT &ExtTy,
3643 unsigned ExtOpcode) {
3644 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3645 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3646 // 64-bits we need to insert a new extension so that it will be 64-bits.
3647 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast<void> (0));
3648 if (OrigTy.getSizeInBits() >= 64)
3649 return N;
3650
3651 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3652 EVT NewVT = getExtensionTo64Bits(OrigTy);
3653
3654 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3655}
3656
3657static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3658 bool isSigned) {
3659 EVT VT = N->getValueType(0);
3660
3661 if (N->getOpcode() != ISD::BUILD_VECTOR)
3662 return false;
3663
3664 for (const SDValue &Elt : N->op_values()) {
3665 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3666 unsigned EltSize = VT.getScalarSizeInBits();
3667 unsigned HalfSize = EltSize / 2;
3668 if (isSigned) {
3669 if (!isIntN(HalfSize, C->getSExtValue()))
3670 return false;
3671 } else {
3672 if (!isUIntN(HalfSize, C->getZExtValue()))
3673 return false;
3674 }
3675 continue;
3676 }
3677 return false;
3678 }
3679
3680 return true;
3681}
3682
3683static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3684 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3685 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3686 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3687 N->getOperand(0)->getValueType(0),
3688 N->getValueType(0),
3689 N->getOpcode());
3690
3691 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast<void> (0));
3692 EVT VT = N->getValueType(0);
3693 SDLoc dl(N);
3694 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3695 unsigned NumElts = VT.getVectorNumElements();
3696 MVT TruncVT = MVT::getIntegerVT(EltSize);
3697 SmallVector<SDValue, 8> Ops;
3698 for (unsigned i = 0; i != NumElts; ++i) {
3699 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3700 const APInt &CInt = C->getAPIntValue();
3701 // Element types smaller than 32 bits are not legal, so use i32 elements.
3702 // The values are implicitly truncated so sext vs. zext doesn't matter.
3703 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3704 }
3705 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3706}
3707
3708static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3709 return N->getOpcode() == ISD::SIGN_EXTEND ||
3710 N->getOpcode() == ISD::ANY_EXTEND ||
3711 isExtendedBUILD_VECTOR(N, DAG, true);
3712}
3713
3714static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3715 return N->getOpcode() == ISD::ZERO_EXTEND ||
3716 N->getOpcode() == ISD::ANY_EXTEND ||
3717 isExtendedBUILD_VECTOR(N, DAG, false);
3718}
3719
3720static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3721 unsigned Opcode = N->getOpcode();
3722 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3723 SDNode *N0 = N->getOperand(0).getNode();
3724 SDNode *N1 = N->getOperand(1).getNode();
3725 return N0->hasOneUse() && N1->hasOneUse() &&
3726 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3727 }
3728 return false;
3729}
3730
3731static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3732 unsigned Opcode = N->getOpcode();
3733 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3734 SDNode *N0 = N->getOperand(0).getNode();
3735 SDNode *N1 = N->getOperand(1).getNode();
3736 return N0->hasOneUse() && N1->hasOneUse() &&
3737 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3738 }
3739 return false;
3740}
3741
3742SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3743 SelectionDAG &DAG) const {
3744 // The rounding mode is in bits 23:22 of the FPSCR.
3745 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3746 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3747 // so that the shift + and get folded into a bitfield extract.
3748 SDLoc dl(Op);
3749
3750 SDValue Chain = Op.getOperand(0);
3751 SDValue FPCR_64 = DAG.getNode(
3752 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3753 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3754 Chain = FPCR_64.getValue(1);
3755 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3756 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3757 DAG.getConstant(1U << 22, dl, MVT::i32));
3758 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3759 DAG.getConstant(22, dl, MVT::i32));
3760 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3761 DAG.getConstant(3, dl, MVT::i32));
3762 return DAG.getMergeValues({AND, Chain}, dl);
3763}
3764
3765SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 SDLoc DL(Op);
3768 SDValue Chain = Op->getOperand(0);
3769 SDValue RMValue = Op->getOperand(1);
3770
3771 // The rounding mode is in bits 23:22 of the FPCR.
3772 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3773 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3774 // ((arg - 1) & 3) << 22).
3775 //
3776 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3777 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3778 // generated llvm.set.rounding to ensure this condition.
3779
3780 // Calculate new value of FPCR[23:22].
3781 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3782 DAG.getConstant(1, DL, MVT::i32));
3783 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3784 DAG.getConstant(0x3, DL, MVT::i32));
3785 RMValue =
3786 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3787 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3788 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3789
3790 // Get current value of FPCR.
3791 SDValue Ops[] = {
3792 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3793 SDValue FPCR =
3794 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3795 Chain = FPCR.getValue(1);
3796 FPCR = FPCR.getValue(0);
3797
3798 // Put new rounding mode into FPSCR[23:22].
3799 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3800 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3801 DAG.getConstant(RMMask, DL, MVT::i64));
3802 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3803 SDValue Ops2[] = {
3804 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3805 FPCR};
3806 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3807}
3808
3809SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3810 EVT VT = Op.getValueType();
3811
3812 // If SVE is available then i64 vector multiplications can also be made legal.
3813 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3814
3815 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3816 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3817
3818 // Multiplications are only custom-lowered for 128-bit vectors so that
3819 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3820 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast<void> (0))
3821 "unexpected type for custom-lowering ISD::MUL")(static_cast<void> (0));
3822 SDNode *N0 = Op.getOperand(0).getNode();
3823 SDNode *N1 = Op.getOperand(1).getNode();
3824 unsigned NewOpc = 0;
3825 bool isMLA = false;
3826 bool isN0SExt = isSignExtended(N0, DAG);
3827 bool isN1SExt = isSignExtended(N1, DAG);
3828 if (isN0SExt && isN1SExt)
3829 NewOpc = AArch64ISD::SMULL;
3830 else {
3831 bool isN0ZExt = isZeroExtended(N0, DAG);
3832 bool isN1ZExt = isZeroExtended(N1, DAG);
3833 if (isN0ZExt && isN1ZExt)
3834 NewOpc = AArch64ISD::UMULL;
3835 else if (isN1SExt || isN1ZExt) {
3836 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3837 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3838 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3839 NewOpc = AArch64ISD::SMULL;
3840 isMLA = true;
3841 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3842 NewOpc = AArch64ISD::UMULL;
3843 isMLA = true;
3844 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3845 std::swap(N0, N1);
3846 NewOpc = AArch64ISD::UMULL;
3847 isMLA = true;
3848 }
3849 }
3850
3851 if (!NewOpc) {
3852 if (VT == MVT::v2i64)
3853 // Fall through to expand this. It is not legal.
3854 return SDValue();
3855 else
3856 // Other vector multiplications are legal.
3857 return Op;
3858 }
3859 }
3860
3861 // Legalize to a S/UMULL instruction
3862 SDLoc DL(Op);
3863 SDValue Op0;
3864 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3865 if (!isMLA) {
3866 Op0 = skipExtensionForVectorMULL(N0, DAG);
3867 assert(Op0.getValueType().is64BitVector() &&(static_cast<void> (0))
3868 Op1.getValueType().is64BitVector() &&(static_cast<void> (0))
3869 "unexpected types for extended operands to VMULL")(static_cast<void> (0));
3870 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3871 }
3872 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3873 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3874 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3875 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3876 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3877 EVT Op1VT = Op1.getValueType();
3878 return DAG.getNode(N0->getOpcode(), DL, VT,
3879 DAG.getNode(NewOpc, DL, VT,
3880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3881 DAG.getNode(NewOpc, DL, VT,
3882 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3883}
3884
3885static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3886 int Pattern) {
3887 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3888 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3889}
3890
3891static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3892 SDLoc DL(Op);
3893 EVT OutVT = Op.getValueType();
3894 SDValue InOp = Op.getOperand(1);
3895 EVT InVT = InOp.getValueType();
3896
3897 // Return the operand if the cast isn't changing type,
3898 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3899 if (InVT == OutVT)
3900 return InOp;
3901
3902 SDValue Reinterpret =
3903 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3904
3905 // If the argument converted to an svbool is a ptrue or a comparison, the
3906 // lanes introduced by the widening are zero by construction.
3907 switch (InOp.getOpcode()) {
3908 case AArch64ISD::SETCC_MERGE_ZERO:
3909 return Reinterpret;
3910 case ISD::INTRINSIC_WO_CHAIN:
3911 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3912 return Reinterpret;
3913 }
3914
3915 // Otherwise, zero the newly introduced lanes.
3916 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3917 SDValue MaskReinterpret =
3918 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3919 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3920}
3921
3922SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3925 SDLoc dl(Op);
3926 switch (IntNo) {
3927 default: return SDValue(); // Don't custom lower most intrinsics.
3928 case Intrinsic::thread_pointer: {
3929 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3930 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3931 }
3932 case Intrinsic::aarch64_neon_abs: {
3933 EVT Ty = Op.getValueType();
3934 if (Ty == MVT::i64) {
3935 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3936 Op.getOperand(1));
3937 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3938 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3939 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3940 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3941 } else {
3942 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3943 }
3944 }
3945 case Intrinsic::aarch64_neon_smax:
3946 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3947 Op.getOperand(1), Op.getOperand(2));
3948 case Intrinsic::aarch64_neon_umax:
3949 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3950 Op.getOperand(1), Op.getOperand(2));
3951 case Intrinsic::aarch64_neon_smin:
3952 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3953 Op.getOperand(1), Op.getOperand(2));
3954 case Intrinsic::aarch64_neon_umin:
3955 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3956 Op.getOperand(1), Op.getOperand(2));
3957
3958 case Intrinsic::aarch64_sve_sunpkhi:
3959 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3960 Op.getOperand(1));
3961 case Intrinsic::aarch64_sve_sunpklo:
3962 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3963 Op.getOperand(1));
3964 case Intrinsic::aarch64_sve_uunpkhi:
3965 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3966 Op.getOperand(1));
3967 case Intrinsic::aarch64_sve_uunpklo:
3968 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3969 Op.getOperand(1));
3970 case Intrinsic::aarch64_sve_clasta_n:
3971 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3973 case Intrinsic::aarch64_sve_clastb_n:
3974 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3975 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3976 case Intrinsic::aarch64_sve_lasta:
3977 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3978 Op.getOperand(1), Op.getOperand(2));
3979 case Intrinsic::aarch64_sve_lastb:
3980 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 case Intrinsic::aarch64_sve_rev:
3983 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3984 Op.getOperand(1));
3985 case Intrinsic::aarch64_sve_tbl:
3986 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3987 Op.getOperand(1), Op.getOperand(2));
3988 case Intrinsic::aarch64_sve_trn1:
3989 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 case Intrinsic::aarch64_sve_trn2:
3992 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3993 Op.getOperand(1), Op.getOperand(2));
3994 case Intrinsic::aarch64_sve_uzp1:
3995 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3996 Op.getOperand(1), Op.getOperand(2));
3997 case Intrinsic::aarch64_sve_uzp2:
3998 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::aarch64_sve_zip1:
4001 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2));
4003 case Intrinsic::aarch64_sve_zip2:
4004 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4005 Op.getOperand(1), Op.getOperand(2));
4006 case Intrinsic::aarch64_sve_splice:
4007 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4008 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4009 case Intrinsic::aarch64_sve_ptrue:
4010 return getPTrue(DAG, dl, Op.getValueType(),
4011 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4012 case Intrinsic::aarch64_sve_clz:
4013 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4014 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4015 case Intrinsic::aarch64_sve_cnt: {
4016 SDValue Data = Op.getOperand(3);
4017 // CTPOP only supports integer operands.
4018 if (Data.getValueType().isFloatingPoint())
4019 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4020 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4021 Op.getOperand(2), Data, Op.getOperand(1));
4022 }
4023 case Intrinsic::aarch64_sve_dupq_lane:
4024 return LowerDUPQLane(Op, DAG);
4025 case Intrinsic::aarch64_sve_convert_from_svbool:
4026 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
4027 Op.getOperand(1));
4028 case Intrinsic::aarch64_sve_convert_to_svbool:
4029 return lowerConvertToSVBool(Op, DAG);
4030 case Intrinsic::aarch64_sve_fneg:
4031 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4032 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4033 case Intrinsic::aarch64_sve_frintp:
4034 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4035 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4036 case Intrinsic::aarch64_sve_frintm:
4037 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4038 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4039 case Intrinsic::aarch64_sve_frinti:
4040 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4041 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4042 case Intrinsic::aarch64_sve_frintx:
4043 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4044 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4045 case Intrinsic::aarch64_sve_frinta:
4046 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4047 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4048 case Intrinsic::aarch64_sve_frintn:
4049 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4050 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4051 case Intrinsic::aarch64_sve_frintz:
4052 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4053 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4054 case Intrinsic::aarch64_sve_ucvtf:
4055 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4056 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4057 Op.getOperand(1));
4058 case Intrinsic::aarch64_sve_scvtf:
4059 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4060 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4061 Op.getOperand(1));
4062 case Intrinsic::aarch64_sve_fcvtzu:
4063 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4064 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4065 Op.getOperand(1));
4066 case Intrinsic::aarch64_sve_fcvtzs:
4067 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4068 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4069 Op.getOperand(1));
4070 case Intrinsic::aarch64_sve_fsqrt:
4071 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4072 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4073 case Intrinsic::aarch64_sve_frecpx:
4074 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4075 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4076 case Intrinsic::aarch64_sve_fabs:
4077 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4078 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4079 case Intrinsic::aarch64_sve_abs:
4080 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4081 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4082 case Intrinsic::aarch64_sve_neg:
4083 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4084 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4085 case Intrinsic::aarch64_sve_insr: {
4086 SDValue Scalar = Op.getOperand(2);
4087 EVT ScalarTy = Scalar.getValueType();
4088 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4089 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4090
4091 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4092 Op.getOperand(1), Scalar);
4093 }
4094 case Intrinsic::aarch64_sve_rbit:
4095 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4096 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4097 Op.getOperand(1));
4098 case Intrinsic::aarch64_sve_revb:
4099 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4100 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4101 case Intrinsic::aarch64_sve_sxtb:
4102 return DAG.getNode(
4103 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4104 Op.getOperand(2), Op.getOperand(3),
4105 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4106 Op.getOperand(1));
4107 case Intrinsic::aarch64_sve_sxth:
4108 return DAG.getNode(
4109 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4110 Op.getOperand(2), Op.getOperand(3),
4111 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4112 Op.getOperand(1));
4113 case Intrinsic::aarch64_sve_sxtw:
4114 return DAG.getNode(
4115 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4116 Op.getOperand(2), Op.getOperand(3),
4117 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4118 Op.getOperand(1));
4119 case Intrinsic::aarch64_sve_uxtb:
4120 return DAG.getNode(
4121 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4122 Op.getOperand(2), Op.getOperand(3),
4123 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4124 Op.getOperand(1));
4125 case Intrinsic::aarch64_sve_uxth:
4126 return DAG.getNode(
4127 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4128 Op.getOperand(2), Op.getOperand(3),
4129 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4130 Op.getOperand(1));
4131 case Intrinsic::aarch64_sve_uxtw:
4132 return DAG.getNode(
4133 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4134 Op.getOperand(2), Op.getOperand(3),
4135 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4136 Op.getOperand(1));
4137
4138 case Intrinsic::localaddress: {
4139 const auto &MF = DAG.getMachineFunction();
4140 const auto *RegInfo = Subtarget->getRegisterInfo();
4141 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4142 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4143 Op.getSimpleValueType());
4144 }
4145
4146 case Intrinsic::eh_recoverfp: {
4147 // FIXME: This needs to be implemented to correctly handle highly aligned
4148 // stack objects. For now we simply return the incoming FP. Refer D53541
4149 // for more details.
4150 SDValue FnOp = Op.getOperand(1);
4151 SDValue IncomingFPOp = Op.getOperand(2);
4152 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4153 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4154 if (!Fn)
4155 report_fatal_error(
4156 "llvm.eh.recoverfp must take a function as the first argument");
4157 return IncomingFPOp;
4158 }
4159
4160 case Intrinsic::aarch64_neon_vsri:
4161 case Intrinsic::aarch64_neon_vsli: {
4162 EVT Ty = Op.getValueType();
4163
4164 if (!Ty.isVector())
4165 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4166
4167 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast<void> (0));
4168
4169 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4170 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4171 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4172 Op.getOperand(3));
4173 }
4174
4175 case Intrinsic::aarch64_neon_srhadd:
4176 case Intrinsic::aarch64_neon_urhadd:
4177 case Intrinsic::aarch64_neon_shadd:
4178 case Intrinsic::aarch64_neon_uhadd: {
4179 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4180 IntNo == Intrinsic::aarch64_neon_shadd);
4181 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4182 IntNo == Intrinsic::aarch64_neon_urhadd);
4183 unsigned Opcode =
4184 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4185 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4186 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4187 Op.getOperand(2));
4188 }
4189 case Intrinsic::aarch64_neon_sabd:
4190 case Intrinsic::aarch64_neon_uabd: {
4191 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4192 : ISD::ABDS;
4193 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4194 Op.getOperand(2));
4195 }
4196 case Intrinsic::aarch64_neon_uaddlp: {
4197 unsigned Opcode = AArch64ISD::UADDLP;
4198 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4199 }
4200 case Intrinsic::aarch64_neon_sdot:
4201 case Intrinsic::aarch64_neon_udot:
4202 case Intrinsic::aarch64_sve_sdot:
4203 case Intrinsic::aarch64_sve_udot: {
4204 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4205 IntNo == Intrinsic::aarch64_sve_udot)
4206 ? AArch64ISD::UDOT
4207 : AArch64ISD::SDOT;
4208 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4209 Op.getOperand(2), Op.getOperand(3));
4210 }
4211 }
4212}
4213
4214bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4215 if (VT.getVectorElementType() == MVT::i8 ||
4216 VT.getVectorElementType() == MVT::i16) {
4217 EltTy = MVT::i32;
4218 return true;
4219 }
4220 return false;
4221}
4222
4223bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4224 if (VT.getVectorElementType() == MVT::i32 &&
4225 VT.getVectorElementCount().getKnownMinValue() >= 4 &&
4226 !VT.isFixedLengthVector())
4227 return true;
4228
4229 return false;
4230}
4231
4232bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4233 return ExtVal.getValueType().isScalableVector() ||
4234 useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
4235 /*OverrideNEON=*/true);
4236}
4237
4238unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4239 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4240 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4241 AArch64ISD::GLD1_MERGE_ZERO},
4242 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4243 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4244 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4245 AArch64ISD::GLD1_MERGE_ZERO},
4246 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4247 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4248 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4249 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4250 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4251 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4252 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4253 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4254 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4255 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4256 };
4257 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4258 return AddrModes.find(Key)->second;
4259}
4260
4261unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4262 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4263 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4264 AArch64ISD::SST1_PRED},
4265 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4266 AArch64ISD::SST1_UXTW_PRED},
4267 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4268 AArch64ISD::SST1_PRED},
4269 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4270 AArch64ISD::SST1_SXTW_PRED},
4271 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4272 AArch64ISD::SST1_SCALED_PRED},
4273 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4274 AArch64ISD::SST1_UXTW_SCALED_PRED},
4275 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4276 AArch64ISD::SST1_SCALED_PRED},
4277 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4278 AArch64ISD::SST1_SXTW_SCALED_PRED},
4279 };
4280 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4281 return AddrModes.find(Key)->second;
4282}
4283
4284unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4285 switch (Opcode) {
4286 default:
4287 llvm_unreachable("unimplemented opcode")__builtin_unreachable();
4288 return Opcode;
4289 case AArch64ISD::GLD1_MERGE_ZERO:
4290 return AArch64ISD::GLD1S_MERGE_ZERO;
4291 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4292 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4293 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4294 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4295 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4296 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4297 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4298 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4299 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4300 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4301 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4302 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4303 }
4304}
4305
4306bool getGatherScatterIndexIsExtended(SDValue Index) {
4307 unsigned Opcode = Index.getOpcode();
4308 if (Opcode == ISD::SIGN_EXTEND_INREG)
4309 return true;
4310
4311 if (Opcode == ISD::AND) {
4312 SDValue Splat = Index.getOperand(1);
4313 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4314 return false;
4315 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4316 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4317 return false;
4318 return true;
4319 }
4320
4321 return false;
4322}
4323
4324// If the base pointer of a masked gather or scatter is null, we
4325// may be able to swap BasePtr & Index and use the vector + register
4326// or vector + immediate addressing mode, e.g.
4327// VECTOR + REGISTER:
4328// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4329// -> getelementptr %offset, <vscale x N x T> %indices
4330// VECTOR + IMMEDIATE:
4331// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4332// -> getelementptr #x, <vscale x N x T> %indices
4333void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4334 unsigned &Opcode, bool IsGather,
4335 SelectionDAG &DAG) {
4336 if (!isNullConstant(BasePtr))
4337 return;
4338
4339 // FIXME: This will not match for fixed vector type codegen as the nodes in
4340 // question will have fixed<->scalable conversions around them. This should be
4341 // moved to a DAG combine or complex pattern so that is executes after all of
4342 // the fixed vector insert and extracts have been removed. This deficiency
4343 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4344 // being folded into the scatter/gather.
4345 ConstantSDNode *Offset = nullptr;
4346 if (Index.getOpcode() == ISD::ADD)
4347 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4348 if (isa<ConstantSDNode>(SplatVal))
4349 Offset = cast<ConstantSDNode>(SplatVal);
4350 else {
4351 BasePtr = SplatVal;
4352 Index = Index->getOperand(0);
4353 return;
4354 }
4355 }
4356
4357 unsigned NewOp =
4358 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4359
4360 if (!Offset) {
4361 std::swap(BasePtr, Index);
4362 Opcode = NewOp;
4363 return;
4364 }
4365
4366 uint64_t OffsetVal = Offset->getZExtValue();
4367 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4368 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4369
4370 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4371 // Index is out of range for the immediate addressing mode
4372 BasePtr = ConstOffset;
4373 Index = Index->getOperand(0);
4374 return;
4375 }
4376
4377 // Immediate is in range
4378 Opcode = NewOp;
4379 BasePtr = Index->getOperand(0);
4380 Index = ConstOffset;
4381}
4382
4383SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4384 SelectionDAG &DAG) const {
4385 SDLoc DL(Op);
4386 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4387 assert(MGT && "Can only custom lower gather load nodes")(static_cast<void> (0));
4388
4389 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4390
4391 SDValue Index = MGT->getIndex();
4392 SDValue Chain = MGT->getChain();
4393 SDValue PassThru = MGT->getPassThru();
4394 SDValue Mask = MGT->getMask();
4395 SDValue BasePtr = MGT->getBasePtr();
4396 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4397
4398 ISD::MemIndexType IndexType = MGT->getIndexType();
4399 bool IsScaled =
4400 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4401 bool IsSigned =
4402 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4403 bool IdxNeedsExtend =
4404 getGatherScatterIndexIsExtended(Index) ||
4405 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4406 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4407
4408 EVT VT = PassThru.getSimpleValueType();
4409 EVT IndexVT = Index.getSimpleValueType();
4410 EVT MemVT = MGT->getMemoryVT();
4411 SDValue InputVT = DAG.getValueType(MemVT);
4412
4413 if (VT.getVectorElementType() == MVT::bf16 &&
4414 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4415 return SDValue();
4416
4417 if (IsFixedLength) {
4418 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast<void> (0))
4419 "Cannot lower when not using SVE for fixed vectors")(static_cast<void> (0));
4420 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4421 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4422 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4423 } else {
4424 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4425 IndexVT = MemVT.changeTypeToInteger();
4426 }
4427 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4428 Mask = DAG.getNode(
4429 ISD::ZERO_EXTEND, DL,
4430 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4431 }
4432
4433 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4434 PassThru = SDValue();
4435
4436 if (VT.isFloatingPoint() && !IsFixedLength) {
4437 // Handle FP data by using an integer gather and casting the result.
4438 if (PassThru) {
4439 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4440 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4441 }
4442 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4443 }
4444
4445 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4446
4447 if (getGatherScatterIndexIsExtended(Index))
4448 Index = Index.getOperand(0);
4449
4450 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4451 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4452 /*isGather=*/true, DAG);
4453
4454 if (ResNeedsSignExtend)
4455 Opcode = getSignExtendedGatherOpcode(Opcode);
4456
4457 if (IsFixedLength) {
4458 if (Index.getSimpleValueType().isFixedLengthVector())
4459 Index = convertToScalableVector(DAG, IndexVT, Index);
4460 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4461 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4462 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4463 }
4464
4465 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4466 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4467 Chain = Result.getValue(1);
4468
4469 if (IsFixedLength) {
4470 Result = convertFromScalableVector(
4471 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4472 Result);
4473 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4474 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4475
4476 if (PassThru)
4477 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4478 } else {
4479 if (PassThru)
4480 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4481
4482 if (VT.isFloatingPoint())
4483 Result = getSVESafeBitCast(VT, Result, DAG);
4484 }
4485
4486 return DAG.getMergeValues({Result, Chain}, DL);
4487}
4488
4489SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4490 SelectionDAG &DAG) const {
4491 SDLoc DL(Op);
4492 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4493 assert(MSC && "Can only custom lower scatter store nodes")(static_cast<void> (0));
4494
4495 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4496
4497 SDValue Index = MSC->getIndex();
4498 SDValue Chain = MSC->getChain();
4499 SDValue StoreVal = MSC->getValue();
4500 SDValue Mask = MSC->getMask();
4501 SDValue BasePtr = MSC->getBasePtr();
4502
4503 ISD::MemIndexType IndexType = MSC->getIndexType();
4504 bool IsScaled =
4505 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4506 bool IsSigned =
4507 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4508 bool NeedsExtend =
4509 getGatherScatterIndexIsExtended(Index) ||
4510 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4511
4512 EVT VT = StoreVal.getSimpleValueType();
4513 EVT IndexVT = Index.getSimpleValueType();
4514 SDVTList VTs = DAG.getVTList(MVT::Other);
4515 EVT MemVT = MSC->getMemoryVT();
4516 SDValue InputVT = DAG.getValueType(MemVT);
4517
4518 if (VT.getVectorElementType() == MVT::bf16 &&
4519 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4520 return SDValue();
4521
4522 if (IsFixedLength) {
4523 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast<void> (0))
4524 "Cannot lower when not using SVE for fixed vectors")(static_cast<void> (0));
4525 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4526 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4527 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4528 } else {
4529 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4530 IndexVT = MemVT.changeTypeToInteger();
4531 }
4532 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4533
4534 StoreVal =
4535 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4536 StoreVal = DAG.getNode(
4537 ISD::ANY_EXTEND, DL,
4538 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4539 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4540 Mask = DAG.getNode(
4541 ISD::ZERO_EXTEND, DL,
4542 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4543 } else if (VT.isFloatingPoint()) {
4544 // Handle FP data by casting the data so an integer scatter can be used.
4545 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4546 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4547 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4548 }
4549
4550 if (getGatherScatterIndexIsExtended(Index))
4551 Index = Index.getOperand(0);
4552
4553 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4554 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4555 /*isGather=*/false, DAG);
4556
4557 if (IsFixedLength) {
4558 if (Index.getSimpleValueType().isFixedLengthVector())
4559 Index = convertToScalableVector(DAG, IndexVT, Index);
4560 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4561 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4562 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4563 }
4564
4565 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4566 return DAG.getNode(Opcode, DL, VTs, Ops);
4567}
4568
4569SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4570 SDLoc DL(Op);
4571 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4572 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast<void> (0));
4573 EVT VT = Op->getValueType(0);
4574
4575 if (useSVEForFixedLengthVectorVT(VT, true))
4576 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4577
4578 SDValue PassThru = LoadNode->getPassThru();
4579 SDValue Mask = LoadNode->getMask();
4580
4581 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4582 return Op;
4583
4584 SDValue Load = DAG.getMaskedLoad(
4585 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4586 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4587 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4588 LoadNode->getExtensionType());
4589
4590 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4591
4592 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4593}
4594
4595// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4596static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4597 EVT VT, EVT MemVT,
4598 SelectionDAG &DAG) {
4599 assert(VT.isVector() && "VT should be a vector type")(static_cast<void> (0));
4600 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast<void> (0));
4601
4602 SDValue Value = ST->getValue();
4603
4604 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4605 // the word lane which represent the v4i8 subvector. It optimizes the store
4606 // to:
4607 //
4608 // xtn v0.8b, v0.8h
4609 // str s0, [x0]
4610
4611 SDValue Undef = DAG.getUNDEF(MVT::i16);
4612 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4613 {Undef, Undef, Undef, Undef});
4614
4615 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4616 Value, UndefVec);
4617 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4618
4619 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4620 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4621 Trunc, DAG.getConstant(0, DL, MVT::i64));
4622
4623 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4624 ST->getBasePtr(), ST->getMemOperand());
4625}
4626
4627// Custom lowering for any store, vector or scalar and/or default or with
4628// a truncate operations. Currently only custom lower truncate operation
4629// from vector v4i16 to v4i8 or volatile stores of i128.
4630SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4631 SelectionDAG &DAG) const {
4632 SDLoc Dl(Op);
4633 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4634 assert (StoreNode && "Can only custom lower store nodes")(static_cast<void> (0));
4635
4636 SDValue Value = StoreNode->getValue();
4637
4638 EVT VT = Value.getValueType();
4639 EVT MemVT = StoreNode->getMemoryVT();
4640
4641 if (VT.isVector()) {
4642 if (useSVEForFixedLengthVectorVT(VT, true))
4643 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4644
4645 unsigned AS = StoreNode->getAddressSpace();
4646 Align Alignment = StoreNode->getAlign();
4647 if (Alignment < MemVT.getStoreSize() &&
4648 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4649 StoreNode->getMemOperand()->getFlags(),
4650 nullptr)) {
4651 return scalarizeVectorStore(StoreNode, DAG);
4652 }
4653
4654 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4655 MemVT == MVT::v4i8) {
4656 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4657 }
4658 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4659 // the custom lowering, as there are no un-paired non-temporal stores and
4660 // legalization will break up 256 bit inputs.
4661 ElementCount EC = MemVT.getVectorElementCount();
4662 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4663 EC.isKnownEven() &&
4664 ((MemVT.getScalarSizeInBits() == 8u ||
4665 MemVT.getScalarSizeInBits() == 16u ||
4666 MemVT.getScalarSizeInBits() == 32u ||
4667 MemVT.getScalarSizeInBits() == 64u))) {
4668 SDValue Lo =
4669 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4670 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4671 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4672 SDValue Hi =
4673 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4674 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4675 StoreNode->getValue(),
4676 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4677 SDValue Result = DAG.getMemIntrinsicNode(
4678 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4679 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4680 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4681 return Result;
4682 }
4683 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4684 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)(static_cast<void> (0));
4685 SDValue Lo =
4686 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4687 DAG.getConstant(0, Dl, MVT::i64));
4688 SDValue Hi =
4689 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4690 DAG.getConstant(1, Dl, MVT::i64));
4691 SDValue Result = DAG.getMemIntrinsicNode(
4692 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4693 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4694 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4695 return Result;
4696 } else if (MemVT == MVT::i64x8) {
4697 SDValue Value = StoreNode->getValue();
4698 assert(Value->getValueType(0) == MVT::i64x8)(static_cast<void> (0));
4699 SDValue Chain = StoreNode->getChain();
4700 SDValue Base = StoreNode->getBasePtr();
4701 EVT PtrVT = Base.getValueType();
4702 for (unsigned i = 0; i < 8; i++) {
4703 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
4704 Value, DAG.getConstant(i, Dl, MVT::i32));
4705 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4706 DAG.getConstant(i * 8, Dl, PtrVT));
4707 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4708 StoreNode->getOriginalAlign());
4709 }
4710 return Chain;
4711 }
4712
4713 return SDValue();
4714}
4715
4716SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4717 SelectionDAG &DAG) const {
4718 SDLoc DL(Op);
4719 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4720 assert(LoadNode && "Expected custom lowering of a load node")(static_cast<void> (0));
4721
4722 if (LoadNode->getMemoryVT() == MVT::i64x8) {
4723 SmallVector<SDValue, 8> Ops;
4724 SDValue Base = LoadNode->getBasePtr();
4725 SDValue Chain = LoadNode->getChain();
4726 EVT PtrVT = Base.getValueType();
4727 for (unsigned i = 0; i < 8; i++) {
4728 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4729 DAG.getConstant(i * 8, DL, PtrVT));
4730 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4731 LoadNode->getPointerInfo(),
4732 LoadNode->getOriginalAlign());
4733 Ops.push_back(Part);
4734 Chain = SDValue(Part.getNode(), 1);
4735 }
4736 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4737 return DAG.getMergeValues({Loaded, Chain}, DL);
4738 }
4739
4740 // Custom lowering for extending v4i8 vector loads.
4741 EVT VT = Op->getValueType(0);
4742 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast<void> (0));
4743
4744 if (LoadNode->getMemoryVT() != MVT::v4i8)
4745 return SDValue();
4746
4747 unsigned ExtType;
4748 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4749 ExtType = ISD::SIGN_EXTEND;
4750 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4751 LoadNode->getExtensionType() == ISD::EXTLOAD)
4752 ExtType = ISD::ZERO_EXTEND;
4753 else
4754 return SDValue();
4755
4756 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4757 LoadNode->getBasePtr(), MachinePointerInfo());
4758 SDValue Chain = Load.getValue(1);
4759 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4760 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4761 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4762 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4763 DAG.getConstant(0, DL, MVT::i64));
4764 if (VT == MVT::v4i32)
4765 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4766 return DAG.getMergeValues({Ext, Chain}, DL);
4767}
4768
4769// Generate SUBS and CSEL for integer abs.
4770SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4771 MVT VT = Op.getSimpleValueType();
4772
4773 if (VT.isVector())
4774 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4775
4776 SDLoc DL(Op);
4777 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4778 Op.getOperand(0));
4779 // Generate SUBS & CSEL.
4780 SDValue Cmp =
4781 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4782 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4783 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4784 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4785 Cmp.getValue(1));
4786}
4787
4788SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4789 SelectionDAG &DAG) const {
4790 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { } while (false);
4791 LLVM_DEBUG(Op.dump())do { } while (false);
4792
4793 switch (Op.getOpcode()) {
4794 default:
4795 llvm_unreachable("unimplemented operand")__builtin_unreachable();
4796 return SDValue();
4797 case ISD::BITCAST:
4798 return LowerBITCAST(Op, DAG);
4799 case ISD::GlobalAddress:
4800 return LowerGlobalAddress(Op, DAG);
4801 case ISD::GlobalTLSAddress:
4802 return LowerGlobalTLSAddress(Op, DAG);
4803 case ISD::SETCC:
4804 case ISD::STRICT_FSETCC:
4805 case ISD::STRICT_FSETCCS:
4806 return LowerSETCC(Op, DAG);
4807 case ISD::BR_CC:
4808 return LowerBR_CC(Op, DAG);
4809 case ISD::SELECT:
4810 return LowerSELECT(Op, DAG);
4811 case ISD::SELECT_CC:
4812 return LowerSELECT_CC(Op, DAG);
4813 case ISD::JumpTable:
4814 return LowerJumpTable(Op, DAG);
4815 case ISD::BR_JT:
4816 return LowerBR_JT(Op, DAG);
4817 case ISD::ConstantPool:
4818 return LowerConstantPool(Op, DAG);
4819 case ISD::BlockAddress:
4820 return LowerBlockAddress(Op, DAG);
4821 case ISD::VASTART:
4822 return LowerVASTART(Op, DAG);
4823 case ISD::VACOPY:
4824 return LowerVACOPY(Op, DAG);
4825 case ISD::VAARG:
4826 return LowerVAARG(Op, DAG);
4827 case ISD::ADDC:
4828 case ISD::ADDE:
4829 case ISD::SUBC:
4830 case ISD::SUBE:
4831 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4832 case ISD::SADDO:
4833 case ISD::UADDO:
4834 case ISD::SSUBO:
4835 case ISD::USUBO:
4836 case ISD::SMULO:
4837 case ISD::UMULO:
4838 return LowerXALUO(Op, DAG);
4839 case ISD::FADD:
4840 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4841 case ISD::FSUB:
4842 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4843 case ISD::FMUL:
4844 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4845 case ISD::FMA:
4846 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4847 case ISD::FDIV:
4848 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4849 case ISD::FNEG:
4850 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4851 case ISD::FCEIL:
4852 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4853 case ISD::FFLOOR:
4854 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4855 case ISD::FNEARBYINT:
4856 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4857 case ISD::FRINT:
4858 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4859 case ISD::FROUND:
4860 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4861 case ISD::FROUNDEVEN:
4862 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4863 case ISD::FTRUNC:
4864 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4865 case ISD::FSQRT:
4866 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4867 case ISD::FABS:
4868 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4869 case ISD::FP_ROUND:
4870 case ISD::STRICT_FP_ROUND:
4871 return LowerFP_ROUND(Op, DAG);
4872 case ISD::FP_EXTEND:
4873 return LowerFP_EXTEND(Op, DAG);
4874 case ISD::FRAMEADDR:
4875 return LowerFRAMEADDR(Op, DAG);
4876 case ISD::SPONENTRY:
4877 return LowerSPONENTRY(Op, DAG);
4878 case ISD::RETURNADDR:
4879 return LowerRETURNADDR(Op, DAG);
4880 case ISD::ADDROFRETURNADDR:
4881 return LowerADDROFRETURNADDR(Op, DAG);
4882 case ISD::CONCAT_VECTORS:
4883 return LowerCONCAT_VECTORS(Op, DAG);
4884 case ISD::INSERT_VECTOR_ELT:
4885 return LowerINSERT_VECTOR_ELT(Op, DAG);
4886 case ISD::EXTRACT_VECTOR_ELT:
4887 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4888 case ISD::BUILD_VECTOR:
4889 return LowerBUILD_VECTOR(Op, DAG);
4890 case ISD::VECTOR_SHUFFLE:
4891 return LowerVECTOR_SHUFFLE(Op, DAG);
4892 case ISD::SPLAT_VECTOR:
4893 return LowerSPLAT_VECTOR(Op, DAG);
4894 case ISD::EXTRACT_SUBVECTOR:
4895 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4896 case ISD::INSERT_SUBVECTOR:
4897 return LowerINSERT_SUBVECTOR(Op, DAG);
4898 case ISD::SDIV:
4899 case ISD::UDIV:
4900 return LowerDIV(Op, DAG);
4901 case ISD::SMIN:
4902 case ISD::UMIN:
4903 case ISD::SMAX:
4904 case ISD::UMAX:
4905 return LowerMinMax(Op, DAG);
4906 case ISD::SRA:
4907 case ISD::SRL:
4908 case ISD::SHL:
4909 return LowerVectorSRA_SRL_SHL(Op, DAG);
4910 case ISD::SHL_PARTS:
4911 case ISD::SRL_PARTS:
4912 case ISD::SRA_PARTS:
4913 return LowerShiftParts(Op, DAG);
4914 case ISD::CTPOP:
4915 return LowerCTPOP(Op, DAG);
4916 case ISD::FCOPYSIGN:
4917 return LowerFCOPYSIGN(Op, DAG);
4918 case ISD::OR:
4919 return LowerVectorOR(Op, DAG);
4920 case ISD::XOR:
4921 return LowerXOR(Op, DAG);
4922 case ISD::PREFETCH:
4923 return LowerPREFETCH(Op, DAG);
4924 case ISD::SINT_TO_FP:
4925 case ISD::UINT_TO_FP:
4926 case ISD::STRICT_SINT_TO_FP:
4927 case ISD::STRICT_UINT_TO_FP:
4928 return LowerINT_TO_FP(Op, DAG);
4929 case ISD::FP_TO_SINT:
4930 case ISD::FP_TO_UINT:
4931 case ISD::STRICT_FP_TO_SINT:
4932 case ISD::STRICT_FP_TO_UINT:
4933 return LowerFP_TO_INT(Op, DAG);
4934 case ISD::FP_TO_SINT_SAT:
4935 case ISD::FP_TO_UINT_SAT:
4936 return LowerFP_TO_INT_SAT(Op, DAG);
4937 case ISD::FSINCOS:
4938 return LowerFSINCOS(Op, DAG);
4939 case ISD::FLT_ROUNDS_:
4940 return LowerFLT_ROUNDS_(Op, DAG);
4941 case ISD::SET_ROUNDING:
4942 return LowerSET_ROUNDING(Op, DAG);
4943 case ISD::MUL:
4944 return LowerMUL(Op, DAG);
4945 case ISD::MULHS:
4946 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4947 /*OverrideNEON=*/true);
4948 case ISD::MULHU:
4949 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4950 /*OverrideNEON=*/true);
4951 case ISD::INTRINSIC_WO_CHAIN:
4952 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4953 case ISD::STORE:
4954 return LowerSTORE(Op, DAG);
4955 case ISD::MSTORE:
4956 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4957 case ISD::MGATHER:
4958 return LowerMGATHER(Op, DAG);
4959 case ISD::MSCATTER:
4960 return LowerMSCATTER(Op, DAG);
4961 case ISD::VECREDUCE_SEQ_FADD:
4962 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4963 case ISD::VECREDUCE_ADD:
4964 case ISD::VECREDUCE_AND:
4965 case ISD::VECREDUCE_OR:
4966 case ISD::VECREDUCE_XOR:
4967 case ISD::VECREDUCE_SMAX:
4968 case ISD::VECREDUCE_SMIN:
4969 case ISD::VECREDUCE_UMAX:
4970 case ISD::VECREDUCE_UMIN:
4971 case ISD::VECREDUCE_FADD:
4972 case ISD::VECREDUCE_FMAX:
4973 case ISD::VECREDUCE_FMIN:
4974 return LowerVECREDUCE(Op, DAG);
4975 case ISD::ATOMIC_LOAD_SUB:
4976 return LowerATOMIC_LOAD_SUB(Op, DAG);
4977 case ISD::ATOMIC_LOAD_AND:
4978 return LowerATOMIC_LOAD_AND(Op, DAG);
4979 case ISD::DYNAMIC_STACKALLOC:
4980 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4981 case ISD::VSCALE:
4982 return LowerVSCALE(Op, DAG);
4983 case ISD::ANY_EXTEND:
4984 case ISD::SIGN_EXTEND:
4985 case ISD::ZERO_EXTEND:
4986 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4987 case ISD::SIGN_EXTEND_INREG: {
4988 // Only custom lower when ExtraVT has a legal byte based element type.
4989 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4990 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4991 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4992 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4993 return SDValue();
4994
4995 return LowerToPredicatedOp(Op, DAG,
4996 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4997 }
4998 case ISD::TRUNCATE:
4999 return LowerTRUNCATE(Op, DAG);
5000 case ISD::MLOAD:
5001 return LowerMLOAD(Op, DAG);
5002 case ISD::LOAD:
5003 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5004 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5005 return LowerLOAD(Op, DAG);
5006 case ISD::ADD:
5007 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
5008 case ISD::AND:
5009 return LowerToScalableOp(Op, DAG);
5010 case ISD::SUB:
5011 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5012 case ISD::FMAXIMUM:
5013 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5014 case ISD::FMAXNUM:
5015 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5016 case ISD::FMINIMUM:
5017 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5018 case ISD::FMINNUM:
5019 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5020 case ISD::VSELECT:
5021 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5022 case ISD::ABS:
5023 return LowerABS(Op, DAG);
5024 case ISD::BITREVERSE:
5025 return LowerBitreverse(Op, DAG);
5026 case ISD::BSWAP:
5027 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5028 case ISD::CTLZ:
5029 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5030 /*OverrideNEON=*/true);
5031 case ISD::CTTZ:
5032 return LowerCTTZ(Op, DAG);
5033 case ISD::VECTOR_SPLICE:
5034 return LowerVECTOR_SPLICE(Op, DAG);
5035 }
5036}
5037
5038bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5039 return !Subtarget->useSVEForFixedLengthVectors();
5040}
5041
5042bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5043 EVT VT, bool OverrideNEON) const {
5044 if (!Subtarget->useSVEForFixedLengthVectors())
5045 return false;
5046
5047 if (!VT.isFixedLengthVector())
5048 return false;
5049
5050 // Don't use SVE for vectors we cannot scalarize if required.
5051 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5052 // Fixed length predicates should be promoted to i8.
5053 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5054 case MVT::i1:
5055 default:
5056 return false;
5057 case MVT::i8:
5058 case MVT::i16:
5059 case MVT::i32:
5060 case MVT::i64:
5061 case MVT::f16:
5062 case MVT::f32:
5063 case MVT::f64:
5064 break;
5065 }
5066
5067 // All SVE implementations support NEON sized vectors.
5068 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5069 return true;
5070
5071 // Ensure NEON MVTs only belong to a single register class.
5072 if (VT.getFixedSizeInBits() <= 128)
5073 return false;
5074
5075 // Don't use SVE for types that don't fit.
5076 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5077 return false;
5078
5079 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5080 // the base fixed length SVE support in place.
5081 if (!VT.isPow2VectorType())
5082 return false;
5083
5084 return true;
5085}
5086
5087//===----------------------------------------------------------------------===//
5088// Calling Convention Implementation
5089//===----------------------------------------------------------------------===//
5090
5091/// Selects the correct CCAssignFn for a given CallingConvention value.
5092CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5093 bool IsVarArg) const {
5094 switch (CC) {
5095 default:
5096 report_fatal_error("Unsupported calling convention.");
5097 case CallingConv::WebKit_JS:
5098 return CC_AArch64_WebKit_JS;
5099 case CallingConv::GHC:
5100 return CC_AArch64_GHC;
5101 case CallingConv::C:
5102 case CallingConv::Fast:
5103 case CallingConv::PreserveMost:
5104 case CallingConv::CXX_FAST_TLS:
5105 case CallingConv::Swift:
5106 case CallingConv::SwiftTail:
5107 case CallingConv::Tail:
5108 if (Subtarget->isTargetWindows() && IsVarArg)
5109 return CC_AArch64_Win64_VarArg;
5110 if (!Subtarget->isTargetDarwin())
5111 return CC_AArch64_AAPCS;
5112 if (!IsVarArg)
5113 return CC_AArch64_DarwinPCS;
5114 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5115 : CC_AArch64_DarwinPCS_VarArg;
5116 case CallingConv::Win64:
5117 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5118 case CallingConv::CFGuard_Check:
5119 return CC_AArch64_Win64_CFGuard_Check;
5120 case CallingConv::AArch64_VectorCall:
5121 case CallingConv::AArch64_SVE_VectorCall:
5122 return CC_AArch64_AAPCS;
5123 }
5124}
5125
5126CCAssignFn *
5127AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5128 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5129 : RetCC_AArch64_AAPCS;
5130}
5131
5132SDValue AArch64TargetLowering::LowerFormalArguments(
5133 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5134 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5135 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5136 MachineFunction &MF = DAG.getMachineFunction();
5137 MachineFrameInfo &MFI = MF.getFrameInfo();
5138 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5139
5140 // Assign locations to all of the incoming arguments.
5141 SmallVector<CCValAssign, 16> ArgLocs;
5142 DenseMap<unsigned, SDValue> CopiedRegs;
5143 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
5144
5145 // At this point, Ins[].VT may already be promoted to i32. To correctly
5146 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5147 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5148 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5149 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5150 // LocVT.
5151 unsigned NumArgs = Ins.size();
5152 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5153 unsigned CurArgIdx = 0;
5154 for (unsigned i = 0; i != NumArgs; ++i) {
5155 MVT ValVT = Ins[i].VT;
5156 if (Ins[i].isOrigArg()) {
5157 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5158 CurArgIdx = Ins[i].getOrigArgIndex();
5159
5160 // Get type of the original argument.
5161 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5162 /*AllowUnknown*/ true);
5163 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5164 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5165 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5166 ValVT = MVT::i8;
5167 else if (ActualMVT == MVT::i16)
5168 ValVT = MVT::i16;
5169 }
5170 bool UseVarArgCC = false;
5171 if (IsWin64)
5172 UseVarArgCC = isVarArg;
5173 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5174 bool Res =
5175 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5176 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5177 (void)Res;
5178 }
5179 SmallVector<SDValue, 16> ArgValues;
5180 unsigned ExtraArgLocs = 0;
5181 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5182 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5183
5184 if (Ins[i].Flags.isByVal()) {
5185 // Byval is used for HFAs in the PCS, but the system should work in a
5186 // non-compliant manner for larger structs.
5187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5188 int Size = Ins[i].Flags.getByValSize();
5189 unsigned NumRegs = (Size + 7) / 8;
5190
5191 // FIXME: This works on big-endian for composite byvals, which are the common
5192 // case. It should also work for fundamental types too.
5193 unsigned FrameIdx =
5194 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5195 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5196 InVals.push_back(FrameIdxN);
5197
5198 continue;
5199 }
5200
5201 if (Ins[i].Flags.isSwiftAsync())
5202 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5203
5204 SDValue ArgValue;
5205 if (VA.isRegLoc()) {
5206 // Arguments stored in registers.
5207 EVT RegVT = VA.getLocVT();
5208 const TargetRegisterClass *RC;
5209
5210 if (RegVT == MVT::i32)
5211 RC = &AArch64::GPR32RegClass;
5212 else if (RegVT == MVT::i64)
5213 RC = &AArch64::GPR64RegClass;
5214 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5215 RC = &AArch64::FPR16RegClass;
5216 else if (RegVT == MVT::f32)
5217 RC = &AArch64::FPR32RegClass;
5218 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5219 RC = &AArch64::FPR64RegClass;
5220 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5221 RC = &AArch64::FPR128RegClass;
5222 else if (RegVT.isScalableVector() &&
5223 RegVT.getVectorElementType() == MVT::i1)
5224 RC = &AArch64::PPRRegClass;
5225 else if (RegVT.isScalableVector())
5226 RC = &AArch64::ZPRRegClass;
5227 else
5228 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")__builtin_unreachable();
5229
5230 // Transform the arguments in physical registers into virtual ones.
5231 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5232 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5233
5234 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5235 // to 64 bits. Insert an assert[sz]ext to capture this, then
5236 // truncate to the right size.
5237 switch (VA.getLocInfo()) {
5238 default:
5239 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5240 case CCValAssign::Full:
5241 break;
5242 case CCValAssign::Indirect:
5243 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5244 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5245 break;
5246 case CCValAssign::BCvt:
5247 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5248 break;
5249 case CCValAssign::AExt:
5250 case CCValAssign::SExt:
5251 case CCValAssign::ZExt:
5252 break;
5253 case CCValAssign::AExtUpper:
5254 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5255 DAG.getConstant(32, DL, RegVT));
5256 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5257 break;
5258 }
5259 } else { // VA.isRegLoc()
5260 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast<void> (0));
5261 unsigned ArgOffset = VA.getLocMemOffset();
5262 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5263 ? VA.getLocVT().getSizeInBits()
5264 : VA.getValVT().getSizeInBits()) / 8;
5265
5266 uint32_t BEAlign = 0;
5267 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5268 !Ins[i].Flags.isInConsecutiveRegs())
5269 BEAlign = 8 - ArgSize;
5270
5271 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5272
5273 // Create load nodes to retrieve arguments from the stack.
5274 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5275
5276 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5277 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5278 MVT MemVT = VA.getValVT();
5279
5280 switch (VA.getLocInfo()) {
5281 default:
5282 break;
5283 case CCValAssign::Trunc:
5284 case CCValAssign::BCvt:
5285 MemVT = VA.getLocVT();
5286 break;
5287 case CCValAssign::Indirect:
5288 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5289 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5290 MemVT = VA.getLocVT();
5291 break;
5292 case CCValAssign::SExt:
5293 ExtType = ISD::SEXTLOAD;
5294 break;
5295 case CCValAssign::ZExt:
5296 ExtType = ISD::ZEXTLOAD;
5297 break;
5298 case CCValAssign::AExt:
5299 ExtType = ISD::EXTLOAD;
5300 break;
5301 }
5302
5303 ArgValue =
5304 DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
5305 MachinePointerInfo::getFixedStack(MF, FI), MemVT);
5306 }
5307
5308 if (VA.getLocInfo() == CCValAssign::Indirect) {
5309 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
5310 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
5311
5312 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5313 unsigned NumParts = 1;
5314 if (Ins[i].Flags.isInConsecutiveRegs()) {
5315 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast<void> (0));
5316 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5317 ++NumParts;
5318 }
5319
5320 MVT PartLoad = VA.getValVT();
5321 SDValue Ptr = ArgValue;
5322
5323 // Ensure we generate all loads for each tuple part, whilst updating the
5324 // pointer after each load correctly using vscale.
5325 while (NumParts > 0) {
5326 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5327 InVals.push_back(ArgValue);
5328 NumParts--;
5329 if (NumParts > 0) {
5330 SDValue BytesIncrement = DAG.getVScale(
5331 DL, Ptr.getValueType(),
5332 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5333 SDNodeFlags Flags;
5334 Flags.setNoUnsignedWrap(true);
5335 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5336 BytesIncrement, Flags);
5337 ExtraArgLocs++;
5338 i++;
5339 }
5340 }
5341 } else {
5342 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5343 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5344 ArgValue, DAG.getValueType(MVT::i32));
5345 InVals.push_back(ArgValue);
5346 }
5347 }
5348 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast<void> (0));
5349
5350 // varargs
5351 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5352 if (isVarArg) {
5353 if (!Subtarget->isTargetDarwin() || IsWin64) {
5354 // The AAPCS variadic function ABI is identical to the non-variadic
5355 // one. As a result there may be more arguments in registers and we should
5356 // save them for future reference.
5357 // Win64 variadic functions also pass arguments in registers, but all float
5358 // arguments are passed in integer registers.
5359 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5360 }
5361
5362 // This will point to the next argument passed via stack.
5363 unsigned StackOffset = CCInfo.getNextStackOffset();
5364 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5365 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5366 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5367
5368 if (MFI.hasMustTailInVarArgFunc()) {
5369 SmallVector<MVT, 2> RegParmTypes;
5370 RegParmTypes.push_back(MVT::i64);
5371 RegParmTypes.push_back(MVT::f128);
5372 // Compute the set of forwarded registers. The rest are scratch.
5373 SmallVectorImpl<ForwardedRegister> &Forwards =
5374 FuncInfo->getForwardedMustTailRegParms();
5375 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5376 CC_AArch64_AAPCS);
5377
5378 // Conservatively forward X8, since it might be used for aggregate return.
5379 if (!CCInfo.isAllocated(AArch64::X8)) {
5380 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5381 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5382 }
5383 }
5384 }
5385
5386 // On Windows, InReg pointers must be returned, so record the pointer in a
5387 // virtual register at the start of the function so it can be returned in the
5388 // epilogue.
5389 if (IsWin64) {
5390 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5391 if (Ins[I].Flags.isInReg()) {
5392 assert(!FuncInfo->getSRetReturnReg())(static_cast<void> (0));
5393
5394 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5395 Register Reg =
5396 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5397 FuncInfo->setSRetReturnReg(Reg);
5398
5399 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5400 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5401 break;
5402 }
5403 }
5404 }
5405
5406 unsigned StackArgSize = CCInfo.getNextStackOffset();
5407 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5408 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5409 // This is a non-standard ABI so by fiat I say we're allowed to make full
5410 // use of the stack area to be popped, which must be aligned to 16 bytes in
5411 // any case:
5412 StackArgSize = alignTo(StackArgSize, 16);
5413
5414 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5415 // a multiple of 16.
5416 FuncInfo->setArgumentStackToRestore(StackArgSize);
5417
5418 // This realignment carries over to the available bytes below. Our own
5419 // callers will guarantee the space is free by giving an aligned value to
5420 // CALLSEQ_START.
5421 }
5422 // Even if we're not expected to free up the space, it's useful to know how
5423 // much is there while considering tail calls (because we can reuse it).
5424 FuncInfo->setBytesInStackArgArea(StackArgSize);
5425
5426 if (Subtarget->hasCustomCallingConv())
5427 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5428
5429 return Chain;
5430}
5431
5432void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5433 SelectionDAG &DAG,
5434 const SDLoc &DL,
5435 SDValue &Chain) const {
5436 MachineFunction &MF = DAG.getMachineFunction();
5437 MachineFrameInfo &MFI = MF.getFrameInfo();
5438 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5439 auto PtrVT = getPointerTy(DAG.getDataLayout());
5440 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5441
5442 SmallVector<SDValue, 8> MemOps;
5443
5444 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5445 AArch64::X3, AArch64::X4, AArch64::X5,
5446 AArch64::X6, AArch64::X7 };
5447 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5448 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5449
5450 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5451 int GPRIdx = 0;
5452 if (GPRSaveSize != 0) {
5453 if (IsWin64) {
5454 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5455 if (GPRSaveSize & 15)
5456 // The extra size here, if triggered, will always be 8.
5457 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5458 } else
5459 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5460
5461 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5462
5463 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5464 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5465 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5466 SDValue Store =
5467 DAG.getStore(Val.getValue(1), DL, Val, FIN,
5468 IsWin64 ? MachinePointerInfo::getFixedStack(
5469 MF, GPRIdx, (i - FirstVariadicGPR) * 8)
5470 : MachinePointerInfo::getStack(MF, i * 8));
5471 MemOps.push_back(Store);
5472 FIN =
5473 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5474 }
5475 }
5476 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5477 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5478
5479 if (Subtarget->hasFPARMv8() && !IsWin64) {
5480 static const MCPhysReg FPRArgRegs[] = {
5481 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5482 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5483 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5484 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5485
5486 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5487 int FPRIdx = 0;
5488 if (FPRSaveSize != 0) {
5489 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5490
5491 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5492
5493 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5494 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5495 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5496
5497 SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
5498 MachinePointerInfo::getStack(MF, i * 16));
5499 MemOps.push_back(Store);
5500 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5501 DAG.getConstant(16, DL, PtrVT));
5502 }
5503 }
5504 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5505 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5506 }
5507
5508 if (!MemOps.empty()) {
5509 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5510 }
5511}
5512
5513/// LowerCallResult - Lower the result values of a call into the
5514/// appropriate copies out of appropriate physical registers.
5515SDValue AArch64TargetLowering::LowerCallResult(
5516 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5517 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5518 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5519 SDValue ThisVal) const {
5520 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5521 // Assign locations to each value returned by this call.
5522 SmallVector<CCValAssign, 16> RVLocs;
5523 DenseMap<unsigned, SDValue> CopiedRegs;
5524 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5525 *DAG.getContext());
5526 CCInfo.AnalyzeCallResult(Ins, RetCC);
5527
5528 // Copy all of the result registers out of their specified physreg.
5529 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5530 CCValAssign VA = RVLocs[i];
5531
5532 // Pass 'this' value directly from the argument to return value, to avoid
5533 // reg unit interference
5534 if (i == 0 && isThisReturn) {
5535 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast<void> (0))
5536 "unexpected return calling convention register assignment")(static_cast<void> (0));
5537 InVals.push_back(ThisVal);
5538 continue;
5539 }
5540
5541 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5542 // allows one use of a physreg per block.
5543 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5544 if (!Val) {
5545 Val =
5546 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5547 Chain = Val.getValue(1);
5548 InFlag = Val.getValue(2);
5549 CopiedRegs[VA.getLocReg()] = Val;
5550 }
5551
5552 switch (VA.getLocInfo()) {
5553 default:
5554 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5555 case CCValAssign::Full:
5556 break;
5557 case CCValAssign::BCvt:
5558 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5559 break;
5560 case CCValAssign::AExtUpper:
5561 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5562 DAG.getConstant(32, DL, VA.getLocVT()));
5563 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5564 case CCValAssign::AExt:
5565 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5566 case CCValAssign::ZExt:
5567 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5568 break;
5569 }
5570
5571 InVals.push_back(Val);
5572 }
5573
5574 return Chain;
5575}
5576
5577/// Return true if the calling convention is one that we can guarantee TCO for.
5578static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5579 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5580 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5581}
5582
5583/// Return true if we might ever do TCO for calls with this calling convention.
5584static bool mayTailCallThisCC(CallingConv::ID CC) {
5585 switch (CC) {
5586 case CallingConv::C:
5587 case CallingConv::AArch64_SVE_VectorCall:
5588 case CallingConv::PreserveMost:
5589 case CallingConv::Swift:
5590 case CallingConv::SwiftTail:
5591 case CallingConv::Tail:
5592 case CallingConv::Fast:
5593 return true;
5594 default:
5595 return false;
5596 }
5597}
5598
5599bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5600 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5601 const SmallVectorImpl<ISD::OutputArg> &Outs,
5602 const SmallVectorImpl<SDValue> &OutVals,
5603 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5604 if (!mayTailCallThisCC(CalleeCC))
5605 return false;
5606
5607 MachineFunction &MF = DAG.getMachineFunction();
5608 const Function &CallerF = MF.getFunction();
5609 CallingConv::ID CallerCC = CallerF.getCallingConv();
5610
5611 // Functions using the C or Fast calling convention that have an SVE signature
5612 // preserve more registers and should assume the SVE_VectorCall CC.
5613 // The check for matching callee-saved regs will determine whether it is
5614 // eligible for TCO.
5615 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5616 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5617 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5618
5619 bool CCMatch = CallerCC == CalleeCC;
5620
5621 // When using the Windows calling convention on a non-windows OS, we want
5622 // to back up and restore X18 in such functions; we can't do a tail call
5623 // from those functions.
5624 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5625 CalleeCC != CallingConv::Win64)
5626 return false;
5627
5628 // Byval parameters hand the function a pointer directly into the stack area
5629 // we want to reuse during a tail call. Working around this *is* possible (see
5630 // X86) but less efficient and uglier in LowerCall.
5631 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5632 e = CallerF.arg_end();
5633 i != e; ++i) {
5634 if (i->hasByValAttr())
5635 return false;
5636
5637 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5638 // In this case, it is necessary to save/restore X0 in the callee. Tail
5639 // call opt interferes with this. So we disable tail call opt when the
5640 // caller has an argument with "inreg" attribute.
5641
5642 // FIXME: Check whether the callee also has an "inreg" argument.
5643 if (i->hasInRegAttr())
5644 return false;
5645 }
5646
5647 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5648 return CCMatch;
5649
5650 // Externally-defined functions with weak linkage should not be
5651 // tail-called on AArch64 when the OS does not support dynamic
5652 // pre-emption of symbols, as the AAELF spec requires normal calls
5653 // to undefined weak functions to be replaced with a NOP or jump to the
5654 // next instruction. The behaviour of branch instructions in this
5655 // situation (as used for tail calls) is implementation-defined, so we
5656 // cannot rely on the linker replacing the tail call with a return.
5657 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5658 const GlobalValue *GV = G->getGlobal();
5659 const Triple &TT = getTargetMachine().getTargetTriple();
5660 if (GV->hasExternalWeakLinkage() &&
5661 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5662 return false;
5663 }
5664
5665 // Now we search for cases where we can use a tail call without changing the
5666 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5667 // concept.
5668
5669 // I want anyone implementing a new calling convention to think long and hard
5670 // about this assert.
5671 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast<void> (0))
5672 "Unexpected variadic calling convention")(static_cast<void> (0));
5673
5674 LLVMContext &C = *DAG.getContext();
5675 if (isVarArg && !Outs.empty()) {
5676 // At least two cases here: if caller is fastcc then we can't have any
5677 // memory arguments (we'd be expected to clean up the stack afterwards). If
5678 // caller is C then we could potentially use its argument area.
5679
5680 // FIXME: for now we take the most conservative of these in both cases:
5681 // disallow all variadic memory operands.
5682 SmallVector<CCValAssign, 16> ArgLocs;
5683 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5684
5685 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5686 for (const CCValAssign &ArgLoc : ArgLocs)
5687 if (!ArgLoc.isRegLoc())
5688 return false;
5689 }
5690
5691 // Check that the call results are passed in the same way.
5692 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5693 CCAssignFnForCall(CalleeCC, isVarArg),
5694 CCAssignFnForCall(CallerCC, isVarArg)))
5695 return false;
5696 // The callee has to preserve all registers the caller needs to preserve.
5697 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5698 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5699 if (!CCMatch) {
5700 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5701 if (Subtarget->hasCustomCallingConv()) {
5702 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5703 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5704 }
5705 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5706 return false;
5707 }
5708
5709 // Nothing more to check if the callee is taking no arguments
5710 if (Outs.empty())
5711 return true;
5712
5713 SmallVector<CCValAssign, 16> ArgLocs;
5714 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5715
5716 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5717
5718 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5719
5720 // If any of the arguments is passed indirectly, it must be SVE, so the
5721 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5722 // allocate space on the stack. That is why we determine this explicitly here
5723 // the call cannot be a tailcall.
5724 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5725 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast<void> (0))
5726 A.getValVT().isScalableVector()) &&(static_cast<void> (0))
5727 "Expected value to be scalable")(static_cast<void> (0));
5728 return A.getLocInfo() == CCValAssign::Indirect;
5729 }))
5730 return false;
5731
5732 // If the stack arguments for this call do not fit into our own save area then
5733 // the call cannot be made tail.
5734 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5735 return false;
5736
5737 const MachineRegisterInfo &MRI = MF.getRegInfo();
5738 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5739 return false;
5740
5741 return true;
5742}
5743
5744SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5745 SelectionDAG &DAG,
5746 MachineFrameInfo &MFI,
5747 int ClobberedFI) const {
5748 SmallVector<SDValue, 8> ArgChains;
5749 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5750 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5751
5752 // Include the original chain at the beginning of the list. When this is
5753 // used by target LowerCall hooks, this helps legalize find the
5754 // CALLSEQ_BEGIN node.
5755 ArgChains.push_back(Chain);
5756
5757 // Add a chain value for each stack argument corresponding
5758 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5759 UE = DAG.getEntryNode().getNode()->use_end();
5760 U != UE; ++U)
5761 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5762 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5763 if (FI->getIndex() < 0) {
5764 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5765 int64_t InLastByte = InFirstByte;
5766 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5767
5768 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5769 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5770 ArgChains.push_back(SDValue(L, 1));
5771 }
5772
5773 // Build a tokenfactor for all the chains.
5774 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5775}
5776
5777bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5778 bool TailCallOpt) const {
5779 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5780 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5781}
5782
5783/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5784/// and add input and output parameter nodes.
5785SDValue
5786AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5787 SmallVectorImpl<SDValue> &InVals) const {
5788 SelectionDAG &DAG = CLI.DAG;
5789 SDLoc &DL = CLI.DL;
5790 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5791 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5792 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5793 SDValue Chain = CLI.Chain;
5794 SDValue Callee = CLI.Callee;
5795 bool &IsTailCall = CLI.IsTailCall;
5796 CallingConv::ID CallConv = CLI.CallConv;
5797 bool IsVarArg = CLI.IsVarArg;
5798
5799 MachineFunction &MF = DAG.getMachineFunction();
5800 MachineFunction::CallSiteInfo CSInfo;
5801 bool IsThisReturn = false;
5802
5803 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5804 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5805 bool IsSibCall = false;
5806 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5807
5808 // Check callee args/returns for SVE registers and set calling convention
5809 // accordingly.
5810 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5811 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5812 return Out.VT.isScalableVector();
5813 });
5814 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5815 return In.VT.isScalableVector();
5816 });
5817
5818 if (CalleeInSVE || CalleeOutSVE)
5819 CallConv = CallingConv::AArch64_SVE_VectorCall;
5820 }
5821
5822 if (IsTailCall) {
5823 // Check if it's really possible to do a tail call.
5824 IsTailCall = isEligibleForTailCallOptimization(
5825 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5826
5827 // A sibling call is one where we're under the usual C ABI and not planning
5828 // to change that but can still do a tail call:
5829 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5830 CallConv != CallingConv::SwiftTail)
5831 IsSibCall = true;
5832
5833 if (IsTailCall)
5834 ++NumTailCalls;
5835 }
5836
5837 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5838 report_fatal_error("failed to perform tail call elimination on a call "
5839 "site marked musttail");
5840
5841 // Analyze operands of the call, assigning locations to each operand.
5842 SmallVector<CCValAssign, 16> ArgLocs;
5843 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5844
5845 if (IsVarArg) {
5846 // Handle fixed and variable vector arguments differently.
5847 // Variable vector arguments always go into memory.
5848 unsigned NumArgs = Outs.size();
5849
5850 for (unsigned i = 0; i != NumArgs; ++i) {
5851 MVT ArgVT = Outs[i].VT;
5852 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5853 report_fatal_error("Passing SVE types to variadic functions is "
5854 "currently not supported");
5855
5856 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5857 bool UseVarArgCC = !Outs[i].IsFixed;
5858 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5859 // too, so use the vararg CC to force them to integer registers.
5860 if (IsCalleeWin64)
5861 UseVarArgCC = true;
5862 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5863 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5864 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5865 (void)Res;
5866 }
5867 } else {
5868 // At this point, Outs[].VT may already be promoted to i32. To correctly
5869 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5870 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5871 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5872 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5873 // LocVT.
5874 unsigned NumArgs = Outs.size();
5875 for (unsigned i = 0; i != NumArgs; ++i) {
5876 MVT ValVT = Outs[i].VT;
5877 // Get type of the original argument.
5878 EVT ActualVT = getValueType(DAG.getDataLayout(),
5879 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5880 /*AllowUnknown*/ true);
5881 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5882 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5883 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5884 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5885 ValVT = MVT::i8;
5886 else if (ActualMVT == MVT::i16)
5887 ValVT = MVT::i16;
5888
5889 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5890 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5891 assert(!Res && "Call operand has unhandled type")(static_cast<void> (0));
5892 (void)Res;
5893 }
5894 }
5895
5896 // Get a count of how many bytes are to be pushed on the stack.
5897 unsigned NumBytes = CCInfo.getNextStackOffset();
5898
5899 if (IsSibCall) {
5900 // Since we're not changing the ABI to make this a tail call, the memory
5901 // operands are already available in the caller's incoming argument space.
5902 NumBytes = 0;
5903 }
5904
5905 // FPDiff is the byte offset of the call's argument area from the callee's.
5906 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5907 // by this amount for a tail call. In a sibling call it must be 0 because the
5908 // caller will deallocate the entire stack and the callee still expects its
5909 // arguments to begin at SP+0. Completely unused for non-tail calls.
5910 int FPDiff = 0;
5911
5912 if (IsTailCall && !IsSibCall) {
5913 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5914
5915 // Since callee will pop argument stack as a tail call, we must keep the
5916 // popped size 16-byte aligned.
5917 NumBytes = alignTo(NumBytes, 16);
5918
5919 // FPDiff will be negative if this tail call requires more space than we
5920 // would automatically have in our incoming argument space. Positive if we
5921 // can actually shrink the stack.
5922 FPDiff = NumReusableBytes - NumBytes;
5923
5924 // Update the required reserved area if this is the tail call requiring the
5925 // most argument stack space.
5926 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5927 FuncInfo->setTailCallReservedStack(-FPDiff);
5928
5929 // The stack pointer must be 16-byte aligned at all times it's used for a
5930 // memory operation, which in practice means at *all* times and in
5931 // particular across call boundaries. Therefore our own arguments started at
5932 // a 16-byte aligned SP and the delta applied for the tail call should
5933 // satisfy the same constraint.
5934 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast<void> (0));
5935 }
5936
5937 // Adjust the stack pointer for the new arguments...
5938 // These operations are automatically eliminated by the prolog/epilog pass
5939 if (!IsSibCall)
5940 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5941
5942 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5943 getPointerTy(DAG.getDataLayout()));
5944
5945 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5946 SmallSet<unsigned, 8> RegsUsed;
5947 SmallVector<SDValue, 8> MemOpChains;
5948 auto PtrVT = getPointerTy(DAG.getDataLayout());
5949
5950 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5951 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5952 for (const auto &F : Forwards) {
5953 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5954 RegsToPass.emplace_back(F.PReg, Val);
5955 }
5956 }
5957
5958 // Walk the register/memloc assignments, inserting copies/loads.
5959 unsigned ExtraArgLocs = 0;
5960 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5961 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5962 SDValue Arg = OutVals[i];
5963 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5964
5965 // Promote the value if needed.
5966 switch (VA.getLocInfo()) {
5967 default:
5968 llvm_unreachable("Unknown loc info!")__builtin_unreachable();
5969 case CCValAssign::Full:
5970 break;
5971 case CCValAssign::SExt:
5972 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5973 break;
5974 case CCValAssign::ZExt:
5975 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5976 break;
5977 case CCValAssign::AExt:
5978 if (Outs[i].ArgVT == MVT::i1) {
5979 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5980 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5981 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5982 }
5983 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5984 break;
5985 case CCValAssign::AExtUpper:
5986 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast<void> (0));
5987 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5988 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5989 DAG.getConstant(32, DL, VA.getLocVT()));
5990 break;
5991 case CCValAssign::BCvt:
5992 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5993 break;
5994 case CCValAssign::Trunc:
5995 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5996 break;
5997 case CCValAssign::FPExt:
5998 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5999 break;
6000 case CCValAssign::Indirect:
6001 assert(VA.getValVT().isScalableVector() &&(static_cast<void> (0))
6002 "Only scalable vectors can be passed indirectly")(static_cast<void> (0));
6003
6004 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
6005 uint64_t PartSize = StoreSize;
6006 unsigned NumParts = 1;
6007 if (Outs[i].Flags.isInConsecutiveRegs()) {
6008 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast<void> (0));
6009 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6010 ++NumParts;
6011 StoreSize *= NumParts;
6012 }
6013
6014 MachineFrameInfo &MFI = MF.getFrameInfo();
6015 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
6016 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
6017 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
6018 MFI.setStackID(FI, TargetStackID::ScalableVector);
6019
6020 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
6021 SDValue Ptr = DAG.getFrameIndex(
6022 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
6023 SDValue SpillSlot = Ptr;
6024
6025 // Ensure we generate all stores for each tuple part, whilst updating the
6026 // pointer after each store correctly using vscale.
6027 while (NumParts) {
6028 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
6029 NumParts--;
6030 if (NumParts > 0) {
6031 SDValue BytesIncrement = DAG.getVScale(
6032 DL, Ptr.getValueType(),
6033 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
6034 SDNodeFlags Flags;
6035 Flags.setNoUnsignedWrap(true);
6036
6037 MPI = MachinePointerInfo(MPI.getAddrSpace());
6038 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6039 BytesIncrement, Flags);
6040 ExtraArgLocs++;
6041 i++;
6042 }
6043 }
6044
6045 Arg = SpillSlot;
6046 break;
6047 }
6048
6049 if (VA.isRegLoc()) {
6050 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6051 Outs[0].VT == MVT::i64) {
6052 assert(VA.getLocVT() == MVT::i64 &&(static_cast<void> (0))
6053 "unexpected calling convention register assignment")(static_cast<void> (0));
6054 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast<void> (0))
6055 "unexpected use of 'returned'")(static_cast<void> (0));
6056 IsThisReturn = true;
6057 }
6058 if (RegsUsed.count(VA.getLocReg())) {
6059 // If this register has already been used then we're trying to pack
6060 // parts of an [N x i32] into an X-register. The extension type will
6061 // take care of putting the two halves in the right place but we have to
6062 // combine them.
6063 SDValue &Bits =
6064 llvm::find_if(RegsToPass,
6065 [=](const std::pair<unsigned, SDValue> &Elt) {
6066 return Elt.first == VA.getLocReg();
6067 })
6068 ->second;
6069 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6070 // Call site info is used for function's parameter entry value
6071 // tracking. For now we track only simple cases when parameter
6072 // is transferred through whole register.
6073 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6074 return ArgReg.Reg == VA.getLocReg();
6075 });
6076 } else {
6077 RegsToPass.emplace_back(VA.getLocReg(), Arg);
6078 RegsUsed.insert(VA.getLocReg());
6079 const TargetOptions &Options = DAG.getTarget().Options;
6080 if (Options.EmitCallSiteInfo)
6081 CSInfo.emplace_back(VA.getLocReg(), i);
6082 }
6083 } else {
6084 assert(VA.isMemLoc())(static_cast<void> (0));
6085
6086 SDValue DstAddr;
6087 MachinePointerInfo DstInfo;
6088
6089 // FIXME: This works on big-endian for composite byvals, which are the
6090 // common case. It should also work for fundamental types too.
6091 uint32_t BEAlign = 0;
6092 unsigned OpSize;
6093 if (VA.getLocInfo() == CCValAssign::Indirect ||
6094 VA.getLocInfo() == CCValAssign::Trunc)
6095 OpSize = VA.getLocVT().getFixedSizeInBits();
6096 else
6097 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6098 : VA.getValVT().getSizeInBits();
6099 OpSize = (OpSize + 7) / 8;
6100 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6101 !Flags.isInConsecutiveRegs()) {
6102 if (OpSize < 8)
6103 BEAlign = 8 - OpSize;
6104 }
6105 unsigned LocMemOffset = VA.getLocMemOffset();
6106 int32_t Offset = LocMemOffset + BEAlign;
6107 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6108 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6109
6110 if (IsTailCall) {
6111 Offset = Offset + FPDiff;
6112 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6113
6114 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6115 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
6116
6117 // Make sure any stack arguments overlapping with where we're storing
6118 // are loaded before this eventual operation. Otherwise they'll be
6119 // clobbered.
6120 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6121 } else {
6122 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6123
6124 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6125 DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
6126 }
6127
6128 if (Outs[i].Flags.isByVal()) {
6129 SDValue SizeNode =
6130 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6131 SDValue Cpy = DAG.getMemcpy(
6132 Chain, DL, DstAddr, Arg, SizeNode,
6133 Outs[i].Flags.getNonZeroByValAlign(),
6134 /*isVol = */ false, /*AlwaysInline = */ false,
6135 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6136
6137 MemOpChains.push_back(Cpy);
6138 } else {
6139 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6140 // promoted to a legal register type i32, we should truncate Arg back to
6141 // i1/i8/i16.
6142 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6143 VA.getValVT() == MVT::i16)
6144 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6145
6146 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6147 MemOpChains.push_back(Store);
6148 }
6149 }
6150 }
6151
6152 if (!MemOpChains.empty())
6153 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6154
6155 // Build a sequence of copy-to-reg nodes chained together with token chain
6156 // and flag operands which copy the outgoing args into the appropriate regs.
6157 SDValue InFlag;
6158 for (auto &RegToPass : RegsToPass) {
6159 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6160 RegToPass.second, InFlag);
6161 InFlag = Chain.getValue(1);
6162 }
6163
6164 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6165 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6166 // node so that legalize doesn't hack it.
6167 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6168 auto GV = G->getGlobal();
6169 unsigned OpFlags =
6170 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
6171 if (OpFlags & AArch64II::MO_GOT) {
6172 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6173 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6174 } else {
6175 const GlobalValue *GV = G->getGlobal();
6176 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6177 }
6178 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6179 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6180 Subtarget->isTargetMachO()) {
6181 const char *Sym = S->getSymbol();
6182 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
6183 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6184 } else {
6185 const char *Sym = S->getSymbol();
6186 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6187 }
6188 }
6189
6190 // We don't usually want to end the call-sequence here because we would tidy
6191 // the frame up *after* the call, however in the ABI-changing tail-call case
6192 // we've carefully laid out the parameters so that when sp is reset they'll be
6193 // in the correct location.
6194 if (IsTailCall && !IsSibCall) {
6195 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6196 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6197 InFlag = Chain.getValue(1);
6198 }
6199
6200 std::vector<SDValue> Ops;
6201 Ops.push_back(Chain);
6202 Ops.push_back(Callee);
6203
6204 if (IsTailCall) {
6205 // Each tail call may have to adjust the stack by a different amount, so
6206 // this information must travel along with the operation for eventual
6207 // consumption by emitEpilogue.
6208 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
6209 }